4

我在 Eclipse 中的动态 Web 项目中工作,我制作了一个 TesseractOCR 类,其中包含:

public class TesseractOCR {

    public TesseractOCR()
    {
    }

    public String doOCR(String file)
    {
         System.setProperty("jna.library.path", "32".equals(System.getProperty("sun.arch.data.model")) ? "lib/win32-x86" : "lib/win32-x86-64");

            File imageFile = new File("C:\\Users\\Sherein Dabbah\\Downloads\\ca096-d7a6d799d7a1d798d799d7a72.jpg");
            Tesseract instance = Tesseract.getInstance();  // JNA Interface Mapping
            Tesseract1 instance1 = new Tesseract1();
            instance.setLanguage("heb+eng");
            // Tesseract1 instance = new Tesseract1(); // JNA Direct Mapping
            // File tessDataFolder = LoadLibs.extractTessResources("tessdata"); // Maven build bundles English data
            // instance.setDatapath(tessDataFolder.getAbsolutePath());
            String sub ="";
            try {
                String result = instance.doOCR(imageFile);
                int indx1 = 6+result.indexOf("אבחנות");
                int indx2 = result.indexOf("הפניות");
                sub = result.substring(indx1,indx2-1);
                System.out.println(sub);
            } catch (Exception e) {
                System.err.println(e.getMessage());
            }

            return sub;
    }
}

虽然有一个包含函数 doPost() 的 servlet

protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

         System.setProperty("jna.library.path", "32".equals(System.getProperty("sun.arch.data.model")) ? "lib/win32-x86" : "lib/win32-x86-64");

         response.setContentType("text/html;charset=UTF-8");

            // Create path components to save the file
            final String path = "C:\\Users\\Sherein Dabbah\\Desktop\\med"; //request.getParameter("destination");
            final Part filePart = request.getPart("file");
            final String fileName = filePart.getSubmittedFileName();

            OutputStream out = null;
            InputStream filecontent = null;
            PrintWriter writer = response.getWriter();

            if(fileName == ""){
                writer.println("You either did not specify a file to upload or are "
                        + "trying to upload a file to a protected or nonexistent "
                        + "location.");
                return;
            }

            String fullName = path + File.separator+ fileName;

            try {
                File newFile = new File(fullName);
                out = new FileOutputStream(newFile);
                filecontent = filePart.getInputStream();

                int read = 0;
                final byte[] bytes = new byte[1024];

                while ((read = filecontent.read(bytes)) != -1) {
                    out.write(bytes, 0, read);
                }

                writer.println("New file " + fileName + " created at " + path);
                LOGGER.log(Level.INFO, "File{0}being uploaded to {1}", 
                        new Object[]{fileName, path});

            } catch (FileNotFoundException fne) {
                writer.println("You either did not specify a file to upload or are "
                        + "trying to upload a file to a protected or nonexistent "
                        + "location.");
                writer.println("<br/> ERROR: " + fne.getMessage());

                LOGGER.log(Level.SEVERE, "Problems during file upload. Error: {0}", 
                        new Object[]{fne.getMessage()});
            } finally {
                if (out != null) {
                    out.close();
                }
                if (filecontent != null) {
                    filecontent.close();
                }
                if (writer != null) {
                    writer.close();
                }
            }

            String s = new TesseractOCR().doOCR(fullName);
            System.out.println(s);
        }

我有一个例外:

   Sep 06, 2015 10:36:46 AM org.apache.catalina.core.StandardWrapperValve invoke
     SEVERE: Servlet.service() for servlet [servlets.UploadServlet] in context   with path [/up] threw exception [Servlet execution threw an exception] with root      cause
    java.lang.Error: Invalid memory access
    at com.sun.jna.Native.invokePointer(Native Method)
    at com.sun.jna.Function.invokePointer(Function.java:470)
    at com.sun.jna.Function.invoke(Function.java:404)
    at com.sun.jna.Function.invoke(Function.java:315)
    at com.sun.jna.Library$Handler.invoke(Library.java:212)
    at com.sun.proxy.$Proxy4.TessBaseAPIGetUTF8Text(Unknown Source)
    at net.sourceforge.tess4j.Tesseract.getOCRText(Unknown Source)
    at net.sourceforge.tess4j.Tesseract.doOCR(Unknown Source)
    at net.sourceforge.tess4j.Tesseract.doOCR(Unknown Source)
    at net.sourceforge.tess4j.Tesseract.doOCR(Unknown Source)
    at classes.TesseractOCR.doOCR(TesseractOCR.java:28)
    at servlets.UploadServlet.doPost(UploadServlet.java:111) 
    at...

它失败了:

String result = instance.doOCR(imageFile); in TesseractOCR class
4

2 回答 2

7

您可能需要打电话setDatapath告诉它在哪里可以找到tessdata.traineddata 文件的文件夹。

此外,您可能不再需要设置jna.library.path变量,因为 tess4j 现在可以自动提取和加载本机库。

于 2015-09-06T17:24:23.023 回答
0

在这种情况下,语言的选择也很重要—— 我用 lang=hin+eng 处理图像,但它给出了同样的错误(在这篇文章中提到)

由于图像中的英文文本较少,所以我更改了 lang=hin 并得到了预期的结果。

public static void main(String[] args) {
        Tesseract in = new ReadImageText().getTesseractInstance("C:/Program Files (x86)/Tesseract-OCR/tessdata/", "hin");
        try {
            String resultText = in.doOCR(new File("C:/EA/app-result/im/01-001/34/0.png"));
            log.info("resultText {}", resultText);
        } catch (TesseractException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
于 2018-05-18T03:55:30.547 回答