我正在尝试运行由Stefano Chizzolini(真棒家伙:PDFClown 的创建者)编写的 Java 代码,以使用 PDF Clown 库解析 PDF 。我收到了这个错误,我不知道我能做些什么来解决这个问题。
Exception in thread "main" org.pdfclown.util.parsers.ParseException: 'name' table does NOT exist.
at org.pdfclown.documents.contents.fonts.OpenFontParser.getName(OpenFontParser.java:570)
at org.pdfclown.documents.contents.fonts.OpenFontParser.load(OpenFontParser.java:221)
at org.pdfclown.documents.contents.fonts.OpenFontParser.<init>(OpenFontParser.java:205)
at org.pdfclown.documents.contents.fonts.TrueTypeFont.loadEncoding(TrueTypeFont.java:91)
at org.pdfclown.documents.contents.fonts.SimpleFont.onLoad(SimpleFont.java:118)
at org.pdfclown.documents.contents.fonts.Font.load(Font.java:738)
at org.pdfclown.documents.contents.fonts.Font.<init>(Font.java:351)
at org.pdfclown.documents.contents.fonts.SimpleFont.<init>(SimpleFont.java:62)
at org.pdfclown.documents.contents.fonts.TrueTypeFont.<init>(TrueTypeFont.java:68)
at org.pdfclown.documents.contents.fonts.Font.wrap(Font.java:253)
at org.pdfclown.documents.contents.FontResources.wrap(FontResources.java:72)
at org.pdfclown.documents.contents.FontResources.wrap(FontResources.java:1)
at org.pdfclown.documents.contents.ResourceItems.get(ResourceItems.java:119)
at org.pdfclown.documents.contents.objects.SetFont.getResource(SetFont.java:119)
at org.pdfclown.documents.contents.objects.SetFont.getFont(SetFont.java:83)
at org.pdfclown.documents.contents.objects.SetFont.scan(SetFont.java:97)
at org.pdfclown.documents.contents.ContentScanner.moveNext(ContentScanner.java:1330)
at org.pdfclown.tools.TextExtractor.extract(TextExtractor.java:626)
at org.pdfclown.tools.TextExtractor.extract(TextExtractor.java:296)
at PDFReader.FullExtract.run(FullExtract.java:71)
at PDFReader.FullExtract.main(FullExtract.java:142)
我知道库包中的类OpenFontParser抛出了这个错误。我能做些什么来解决这个问题吗?
此代码适用于大多数 PDF。我有一个无法解析的 PDF。我猜这是因为pdf下面的这个符号。
public class PDFReader extends Sample {
@Override
public void run()
{
String filePath = new String("C:\\Users\\XYZ\\Desktop\\SomeSamplePDF.pdf");
// 1. Open the PDF file!
File file;
try
{file = new File(filePath);}
catch(Exception e)
{throw new RuntimeException(filePath + " file access error.",e);}
// 2. Get the PDF document!
Document document = file.getDocument();
// 3. Extracting text from the document pages...
for(Page page : document.getPages())
{
extract(new ContentScanner(page)); // Wraps the page contents into a scanner.
}
close(file);
}
private void close(File file) {
// TODO Auto-generated method stub
}
/**
Scans a content level looking for text.
*/
/*
NOTE: Page contents are represented by a sequence of content objects,
possibly nested into multiple levels.
*/
private void extract(
ContentScanner level
)
{
if(level == null)
return;
while(level.moveNext())
{
ContentObject content = level.getCurrent();
if(content instanceof ShowText)
{
Font font = level.getState().getFont();
// Extract the current text chunk, decoding it!
System.out.println(font.decode(((ShowText)content).getText()));
}
else if(content instanceof Text
|| content instanceof ContainerObject)
{
// Scan the inner level!
extract(level.getChildLevel());
}
}
}
private boolean prompt(Page page)
{
int pageIndex = page.getIndex();
if(pageIndex > 0)
{
Map<String,String> options = new HashMap<String,String>();
options.put("", "Scan next page");
options.put("Q", "End scanning");
if(!promptChoice(options).equals(""))
return false;
}
System.out.println("\nScanning page " + (pageIndex+1) + "...\n");
return true;
}
public static void main(String args[])
{
new PDFReader().run();
}
}