我正在尝试使用 tess4j 识别一些屏幕截图的文本,但输出中有许多字母错误或缺少单词。这里的代码:
package duote;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.ITessAPI;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.Word;
public class Test
{
public static void main(String[] args) throws IOException
{
String imageUrl = "C:/Users/alexc/eclipse-workspace/duote/src/images/apple.png";
File imageFile = new File(imageUrl);
Image image = ImageIO.read(imageFile);
BufferedImage bi = toBufferedImage(image);
ITesseract instance = new Tesseract();
for(Word word : instance.getWords(bi, ITessAPI.TessPageIteratorLevel.RIL_WORD))
{
Rectangle rect = word.getBoundingBox();
System.out.println(rect.getMinX()+","+rect.getMaxX()+","+rect.getMinY()+","+rect.getMaxY()
+": "+word.getText());
}
}
public static BufferedImage toBufferedImage(Image img)
{
if (img instanceof BufferedImage)
{
return (BufferedImage) img;
}
// Create a buffered image with transparency
BufferedImage bimage = new BufferedImage(img.getWidth(null), img.getHeight(null), BufferedImage.TYPE_INT_ARGB);
// Draw the image on to the buffered image
Graphics2D bGr = bimage.createGraphics();
bGr.drawImage(img, 0, 0, null);
bGr.dispose();
// Return the buffered image
return bimage;
}
}
我尝试使用其他屏幕截图,有时缺少单词。有没有办法更准确或者我错了?谢谢。
输出:
185.0,200.0,8.0,26.0: 6
277.0,303.0,14.0,24.0: Mac
379.0,406.0,13.0,24.0: iPad
482.0,525.0,13.0,24.0: iPhcne
602.0,642.0,14.0,24.0: Watch
718.0,735.0,14.0,24.0: TV
812.0,849.0,14.0,24.0: Mus":
926.0,977.0,14.0,27.0: Supparl
1053.0,1071.0,10.0,27.0: Q
1147.0,1164.0,7.0,27.0: C]
514.0,523.0,96.0,104.0: .
514.0,681.0,97.0,138.0: Phone
694.0,738.0,97.0,137.0: 11
758.0,837.0,97.0,138.0: Pro
426.0,464.0,159.0,180.0: Pro
470.0,577.0,164.0,180.0: cameras.
584.0,622.0,159.0,180.0: Pro
628.0,716.0,159.0,185.0: display.
722.0,761.0,159.0,180.0: Pro
767.0,923.0,159.0,184.0: performance.
521.0,559.0,198.0,210.0: From
564.0,652.0,197.0,213.0: £30.99/m0.
658.0,673.0,201.0,210.0: or
678.0,718.0,198.0,210.0: £759
723.0,754.0,197.0,210.0: with
759.0,829.0,197.0,210.0: lrade-in.‘
578.0,691.0,236.0,252.0: Leammore)
724.0,771.0,236.0,255.0: Buy)
716.0,728.0,278.0,294.0: -4
500.0,554.0,344.0,365.0: At:
587.0,623.0,346.0,366.0: ?g;
663.0,699.0,344.0,365.0: w},
719.0,833.0,344.0,370.0: w‘w?’?.
851.0,852.0,354.0,357.0: ‘
869.0,887.0,349.0,367.0: f
408.0,414.0,378.0,382.0: H
456.0,497.0,367.0,386.0: 1
844.0,907.0,358.0,386.0: LA],
961.0,980.0,366.0,379.0: “a
386.0,397.0,385.0,398.0: y
417.0,472.0,371.0,427.0: g
795.0,797.0,396.0,398.0: .
831.0,854.0,386.0,396.0: f
866.0,888.0,383.0,399.0: M
906.0,924.0,390.0,406.0: gr
942.0,958.0,379.0,399.0: m“
413.0,418.0,399.0,408.0: ‘
856.0,878.0,406.0,418.0: A;
887.0,913.0,403.0,419.0: ;»
919.0,958.0,402.0,421.0: mi"
964.0,1003.0,403.0,419.0: 1&4‘
402.0,464.0,425.0,443.0: £4.43
488.0,553.0,400.0,469.0: 0
850.0,970.0,416.0,440.0: ”nffv’??'ér’
991.0,993.0,423.0,426.0: ‘
331.0,333.0,521.0,524.0: ,
383.0,408.0,514.0,525.0: \,-'~
435.0,436.0,515.0,518.0: ‘
875.0,895.0,515.0,526.0: ff
923.0,929.0,518.0,523.0: «
956.0,980.0,516.0,526.0: e,
350.0,368.0,531.0,547.0: o
434.0,442.0,529.0,546.0: a
593.0,605.0,532.0,544.0: a
871.0,936.0,524.0,545.0: ?ux
966.0,986.0,525.0,538.0: U
394.0,418.0,576.0,603.0: ‘f
461.0,463.0,581.0,583.0: '
877.0,890.0,570.0,602.0: 1L
926.0,928.0,587.0,589.0: -
948.0,969.0,574.0,599.0: Ian
0.0,238.0,601.0,618.0: _
406.0,433.0,600.0,618.0: ;\
460.0,468.0,598.0,614.0: ‘~
974.0,1018.0,606.0,616.0: ')~
我认为这张图片很容易识别(黑底白字)。