AWS-Textract-Key-Value-Pair Java - thread “main” java.lang.NullPointerException

前端 未结 1 451
旧时难觅i
旧时难觅i 2020-12-12 06:56

I am using AWS Textract in a Java Spring boot project. I have set up AWS CLI and have the SDK as a maven dependency.

I have written Java code, converted from C# in or

相关标签:
1条回答
  • 2020-12-12 07:17

    I am very sure other Java Devs are going to appreciate this Code. I answered my question with the help of Rikus.

        package ai.tautona.lloyds.mailboxprocessor.service;
    import com.amazonaws.services.textract.AmazonTextract;
    import com.amazonaws.services.textract.AmazonTextractClientBuilder;
    import com.amazonaws.services.textract.model.Document;
    import java.nio.file.Files;
    import com.amazonaws.services.textract.model.*;
    import org.apache.commons.collections.CollectionUtils;
    import org.springframework.stereotype.Service;
    import org.springframework.transaction.annotation.Transactional;
    import javax.validation.constraints.NotNull;
    import java.io.File;
    import java.io.IOException;
    import java.nio.ByteBuffer;
    import java.util.*;
    
    
    @Service
    @Transactional
    
    public class AWSTextractService {
    
    public static void getKVMap(String localFile) throws IOException {
    
        File file = new File(localFile);
        byte[] fileContent = Files.readAllBytes(file.toPath());
        AmazonTextract client = AmazonTextractClientBuilder.defaultClient();
    
        AnalyzeDocumentRequest request = new AnalyzeDocumentRequest()
            .withDocument(new Document()
                .withBytes(ByteBuffer.wrap(fileContent))).withFeatureTypes(FeatureType.FORMS);
    
    
        AnalyzeDocumentResult result = client.analyzeDocument(request);
    
    
        //Get the text blocks
        List<Block> blocks = result.getBlocks();
    
        //get key and value maps
        List<Block> key_map = new ArrayList<>();
        List<Block> value_map = new ArrayList<>();
        List<Block> block_map = new ArrayList<>();
    
        for (Block block : blocks) {
            block_map.add(block);
            if (block.getBlockType().equals("KEY_VALUE_SET")) {
                if (block.getEntityTypes().contains("KEY")) {
                    key_map.add(block);
                } else {
                    value_map.add(block);
                }
    
            }
    
        }
    
        //Get Key Value relationship
        getKVMapRelationship(key_map, value_map, block_map).forEach((k, v) -> System.out.println("key: " + k + " value:" + v));
    
    
    
    
    }
    
    
    @NotNull
    public static HashMap<String, String> getKVMapRelationship(List<Block> key_map, List<Block> value_map, List<Block> block_map) throws IOException {
        HashMap<String, String> kvs = new HashMap<>();
        ;
        Block value_block;
        String key, val = "";
        for (Block key_block : key_map) {
            value_block = Find_value_block(key_block, value_map);
            key = Get_text(key_block, block_map);
            val = Get_text(value_block, block_map);
    
            kvs.put(key, val);
        }
    
        return kvs;
    
    }
    
    @NotNull
    public static Block Find_value_block(Block block, List<Block> value_map) {
        Block value_block = new Block();
        for (Relationship relationship : block.getRelationships()) {
            if (relationship.getType().equals("VALUE")) {
                for (String value_id : relationship.getIds()) {
    
                    for (Block value : value_map) {
                        if (value.getId().equals(value_id)) {
                            value_block = value;
                        }
    
                    }
    
                }
    
            }
    
        }
        return value_block;
    
    }
    
    //null
    @NotNull
    public static String Get_text(Block result, List<Block> block_map) throws IOException {
        String text = "";
        Block word2= new Block();
        try {
    
            if (result != null
                && CollectionUtils.isNotEmpty(result.getRelationships())) {
    
                for (Relationship relationship : result.getRelationships()) {
    
                    if (relationship.getType().equals("CHILD")) {
    
                        for (String id : relationship.getIds()) {
    
                            Block word= (block_map.stream().filter(x-> x.getId().equals(id)).findFirst().orElse(word2));
    
    
                            if (word.getBlockType().equals("WORD")) {
                                text += word.getText() + " ";
                            } else if (word.getBlockType().equals("SELECTION_ELEMENT")) {
    
                                if (word.getSelectionStatus().equals("SELECTED")) {
                                    text += "X ";
                                }
                            }
                        }
                    }
                }
            }
    
        } catch (Exception e) {
            System.out.println(e);
        }
        return text;
    }
    
    public static void main (String[]args) throws IOException {
    
        String fileStr = "/home/daniel/Documents/atrium_sources/accordImage-1.png";
    
        AWSTextractService.getKVMap(fileStr);
    
        System.out.println("Done!");
    }
    

    }

    0 讨论(0)
提交回复
热议问题