1

我在 Java Spring 启动项目中使用 AWS Textract。我已经设置了 AWS CLI 并将开发工具包作为 maven 依赖项。

我编写了从 C# 转换的 Java 代码以提取键和值对,并且在成功提取一些单词后收到以下错误

AGENCYCUSTOMERID:FEIN(如果适用)MARITALSTATUS/CIVILUNION(如果适用)保险地点代码BUSPRIMARYE-MAILADDRESS:FEIN(如果适用)LINEOFBUSINESSCELLMARITALSTATUScivilUNION(如果适用)CELLCELLHOME ”:

AGENCYCUSTOMERID:FEIN(ifapplicable)MARITALSTATUS/CIVILUNION(ifapplicable)INSUREDLOCATIONCODEBUSPRIMARYE-MAILADDRESS:FEIN(ifapplicable)LINEOFBUSINESSCELLMARITALSTATUScivilUNION(ifapplicable)CELLCELLHOMEException in thread "main" java.lang.NullPointerException
at ai.tautona.lloyds.mailboxprocessor.service.AWSTextractService.Get_text(AWSTextractService.java:112)
at ai.tautona.lloyds.mailboxprocessor.service.AWSTextractService.getKVMapRelationship(AWSTextractService.java:74)
at ai.tautona.lloyds.mailboxprocessor.service.AWSTextractService.getKVMap(AWSTextractService.java:57)
at ai.tautona.lloyds.mailboxprocessor.service.AWSTextractService.main(AWSTextractService.java:148)

通过调试,我发现导致错误的行是:

   text += "X ";

似乎在找到 SELECTION ELEMENT / CHECKBOX 后它失败了?

我的代码:

 public class AWSTextractService {


public static void getKVMap(String localFile) throws IOException {

    File file = new File(localFile);
    byte[] fileContent = Files.readAllBytes(file.toPath());
    AmazonTextract client = AmazonTextractClientBuilder.defaultClient();

    AnalyzeDocumentRequest request = new AnalyzeDocumentRequest()
        .withDocument(new Document()
            .withBytes(ByteBuffer.wrap(fileContent))).withFeatureTypes(FeatureType.FORMS);


    AnalyzeDocumentResult result = client.analyzeDocument(request);


    //Get the text blocks
    List<Block> blocks = result.getBlocks();

    //get key and value maps
    List<Block> key_map = new ArrayList<>();
    List<Block> value_map = new ArrayList<>();
    List<Block> block_map = new ArrayList<>();

    for (Block block : blocks) {
        block_map.add(block);
        if (block.getBlockType().equals("KEY_VALUE_SET")) {
            if (block.getEntityTypes().contains("KEY")) {
                key_map.add(block);
            } else {
                value_map.add(block);
            }

        }

    }

    //Get Key Value relationship
    getKVMapRelationship(key_map, value_map, block_map).forEach((k, v) -> System.out.println("key: " + k + " value:" + v));

   getKeyValueRelationship.forEach((k,v)-> System.out.println("key: "+k+" value:"+v));


}


@NotNull
public static HashMap<String, String> getKVMapRelationship(List<Block> key_map, List<Block> value_map, List<Block> block_map) throws IOException {
    HashMap<String, String> kvs = new HashMap<>();
    ;
    Block value_block;
    String key, val = "";
    for (Block key_block : key_map) {
        value_block = Find_value_block(key_block, value_map);
        key = Get_text(key_block, block_map);
        val = Get_text(value_block, block_map);
        System.out.printf(key, val);
        kvs.put("1", "2");
    }

    return kvs;

}

@NotNull
public static Block Find_value_block(Block block, List<Block> value_map) {
    Block value_block = new Block();
    for (Relationship relationship : block.getRelationships()) {
        if (relationship.getType().equals("VALUE")) {
            for (String value_id : relationship.getIds()) {

                for (Block value : value_map) {
                    if (value.getId().equals(value_id)) {
                        value_block = value;
                    }

                }

            }

        }

    }
    return value_block;

}

//null
@NotNull
public static String Get_text(Block result, List<Block> block_map) throws IOException {
    String text = "";
    Block word = new Block();
    Block word2 = null;
    if (result.getRelationships().stream().count() > 0) {
        for (Relationship relationship : result.getRelationships()) {
            if (relationship.getType().equals("CHILD")) {
                for (String child_id : relationship.getIds()) {

                    word = block_map.stream()
                        .filter((x)-> x.getId().equals(child_id)).findFirst().orElse(word2);


                    if (word.getBlockType().equals("WORD"))
                    {
                        text += (word.getText() ==null ? "" : word.getText()) + "";
                    }
                    if (word.getBlockType().equals("SELECTION_ELEMENT"))

                    {
                        if(word.getSelectionStatus().equals("SELECTED"))

                        {
                            text += "X ";

                        }
                    }
                }
            }
        }

    }

    return text;

}
public static void main (String[]args) throws IOException {

    String fileStr = "/home/daniel/Documents/atrium_sources/accordImage-1.png";

    AWSTextractService.getKVMap(fileStr);

    System.out.println("Done!");
}

}

我不确定是什么问题?

4

1 回答 1

3

我很确定其他 Java 开发人员会喜欢这个代码。我在Rikus的帮助下回答了我的问题。

    package ai.tautona.lloyds.mailboxprocessor.service;
import com.amazonaws.services.textract.AmazonTextract;
import com.amazonaws.services.textract.AmazonTextractClientBuilder;
import com.amazonaws.services.textract.model.Document;
import java.nio.file.Files;
import com.amazonaws.services.textract.model.*;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.validation.constraints.NotNull;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;


@Service
@Transactional

public class AWSTextractService {

public static void getKVMap(String localFile) throws IOException {

    File file = new File(localFile);
    byte[] fileContent = Files.readAllBytes(file.toPath());
    AmazonTextract client = AmazonTextractClientBuilder.defaultClient();

    AnalyzeDocumentRequest request = new AnalyzeDocumentRequest()
        .withDocument(new Document()
            .withBytes(ByteBuffer.wrap(fileContent))).withFeatureTypes(FeatureType.FORMS);


    AnalyzeDocumentResult result = client.analyzeDocument(request);


    //Get the text blocks
    List<Block> blocks = result.getBlocks();

    //get key and value maps
    List<Block> key_map = new ArrayList<>();
    List<Block> value_map = new ArrayList<>();
    List<Block> block_map = new ArrayList<>();

    for (Block block : blocks) {
        block_map.add(block);
        if (block.getBlockType().equals("KEY_VALUE_SET")) {
            if (block.getEntityTypes().contains("KEY")) {
                key_map.add(block);
            } else {
                value_map.add(block);
            }

        }

    }

    //Get Key Value relationship
    getKVMapRelationship(key_map, value_map, block_map).forEach((k, v) -> System.out.println("key: " + k + " value:" + v));




}


@NotNull
public static HashMap<String, String> getKVMapRelationship(List<Block> key_map, List<Block> value_map, List<Block> block_map) throws IOException {
    HashMap<String, String> kvs = new HashMap<>();
    ;
    Block value_block;
    String key, val = "";
    for (Block key_block : key_map) {
        value_block = Find_value_block(key_block, value_map);
        key = Get_text(key_block, block_map);
        val = Get_text(value_block, block_map);

        kvs.put(key, val);
    }

    return kvs;

}

@NotNull
public static Block Find_value_block(Block block, List<Block> value_map) {
    Block value_block = new Block();
    for (Relationship relationship : block.getRelationships()) {
        if (relationship.getType().equals("VALUE")) {
            for (String value_id : relationship.getIds()) {

                for (Block value : value_map) {
                    if (value.getId().equals(value_id)) {
                        value_block = value;
                    }

                }

            }

        }

    }
    return value_block;

}

//null
@NotNull
public static String Get_text(Block result, List<Block> block_map) throws IOException {
    String text = "";
    Block word2= new Block();
    try {

        if (result != null
            && CollectionUtils.isNotEmpty(result.getRelationships())) {

            for (Relationship relationship : result.getRelationships()) {

                if (relationship.getType().equals("CHILD")) {

                    for (String id : relationship.getIds()) {

                        Block word= (block_map.stream().filter(x-> x.getId().equals(id)).findFirst().orElse(word2));


                        if (word.getBlockType().equals("WORD")) {
                            text += word.getText() + " ";
                        } else if (word.getBlockType().equals("SELECTION_ELEMENT")) {

                            if (word.getSelectionStatus().equals("SELECTED")) {
                                text += "X ";
                            }
                        }
                    }
                }
            }
        }

    } catch (Exception e) {
        System.out.println(e);
    }
    return text;
}

public static void main (String[]args) throws IOException {

    String fileStr = "/home/daniel/Documents/atrium_sources/accordImage-1.png";

    AWSTextractService.getKVMap(fileStr);

    System.out.println("Done!");
}

}

于 2020-06-21T13:55:26.200 回答