我遇到了 POI 问题,需要一些帮助。
我有一个个人日记,我已经保存了多年,每月将每日条目写入一个 Word .doc,存储在 year 文件夹中。我添加了一个密码来打开每一个,所以它们都是加密的。
我想使用 Lucene 来索引整个集合,以便更好地搜索(例如,“我最后一次在哪一天和哪一年写下我有多喜欢燕麦片?”)。
第一步是使用 POI 读取 Word .doc,但我无法摆脱困境,因为它无法读取我的加密文件。
我写了这个类:
package model;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.DocumentProperties;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.dev.POIFSLister;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.LinkedList;
import java.util.List;
/**
* JournalReader class that's the heart of my efforts to finally read, parse, index, and search my journal.
* @author Michael
* @link
* @since 8/19/12 3:48 PM
*/
public class JournalReader {
public static final Log LOGGER = LogFactory.getLog(JournalReader.class);
public static final String DEFAULT_PASSWORD = "journal";
public static void main(String[] args) {
if (args.length > 0) {
try {
POIFSLister.viewFile(args[0], true);
} catch (IOException e) {
e.printStackTrace();
}
}
}
public List<JournalEntry> readEntries(File journalFile) throws IOException, GeneralSecurityException {
List<JournalEntry> journalEntries = new LinkedList<JournalEntry>();
if (journalFile != null) {
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(journalFile));
// Exception is thrown after info line is executed.
EncryptionInfo info = new EncryptionInfo(fs);
Decryptor decryptor = Decryptor.getInstance(info);
decryptor.verifyPassword(DEFAULT_PASSWORD);
HWPFDocument journalDocument = new HWPFDocument(decryptor.getDataStream(fs));
DocumentProperties documentProperties = journalDocument.getDocProperties();
}
return journalEntries;
}
}
我有一个 JUnit 测试来尝试一下:
import model.JournalEntry;
import model.JournalReader;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.List;
/**
* JournalReaderTest JUnit test for JournalReader
* @author Michael
* @link
* @since 8/19/12 8:46 PM
*/
public class JournalReaderTest {
@Test
public void testReadEntries() throws IOException, GeneralSecurityException {
JournalReader journalReader = new JournalReader();
String journalFilePath = "C:\\Users\\Michael\\Documents\\Stuff To Back Up\\Journal\\1994\\AUG94.doc";
File journalFile = new File(journalFilePath);
List<JournalEntry> journalEntries = journalReader.readEntries(journalFile);
Assert.assertNotNull(journalEntries);
Assert.assertTrue(journalEntries.size() > 0);
}
}
当我运行 JUnit 测试时,我得到了这个堆栈跟踪:
"C:\Program Files\Java\jdk1.7.0_02\bin\java" -ea -Didea.launcher.port=7540 "-Didea.launcher.bin.path=C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\lib\idea_rt.jar;C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\plugins\junit\lib\junit-rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\alt-rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\jce.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\resources.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\zipfs.jar;F:\Projects\Java\diary-index\out\test\diary-index;F:\Projects\Java\diary-index\out\production\diary-index;F:\Projects\Java\diary-index\lib\commons-lang3-3.1.jar;F:\Projects\Java\diary-index\lib\log4j-1.2.16.jar;F:\Projects\Java\diary-index\lib\commons-io-2.3.jar;F:\Projects\Java\diary-index\lib\poi-scratchpad-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-examples-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-excelant-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-ooxml-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-ooxml-schemas-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\dom4j-1.6.1.jar;F:\Projects\Java\diary-index\lib\stax-api-1.0.1.jar;F:\Projects\Java\diary-index\lib\xmlbeans-2.3.0.jar;F:\Projects\Java\diary-index\lib\antlr-2.7.7.jar;F:\Projects\Java\diary-index\lib\antlr-runtime-3.3.jar;F:\Projects\Java\diary-index\lib\avalon-framework-api-4.3.1.jar;F:\Projects\Java\diary-index\lib\avalon-framework-impl-4.3.1.jar;F:\Projects\Java\diary-index\lib\commons-codec-1.3.jar;F:\Projects\Java\diary-index\lib\commons-io-1.3.1.jar;F:\Projects\Java\diary-index\lib\commons-lang-2.4.jar;F:\Projects\Java\diary-index\lib\commons-logging-1.1.1.jar;F:\Projects\Java\diary-index\lib\docx4j-2.8.0.jar;F:\Projects\Java\diary-index\lib\fop-1.0.jar;F:\Projects\Java\diary-index\lib\itext-2.1.7.jar;F:\Projects\Java\diary-index\lib\jaxb-svg11-1.0.2.jar;F:\Projects\Java\diary-index\lib\jaxb-xmldsig-core-1.0.0.jar;F:\Projects\Java\diary-index\lib\jaxb-xslfo-1.0.1.jar;F:\Projects\Java\diary-index\lib\log4j-1.2.15.jar;F:\Projects\Java\diary-index\lib\poi-3.8.jar;F:\Projects\Java\diary-index\lib\poi-scratchpad-3.8.jar;F:\Projects\Java\diary-index\lib\serializer-2.7.1.jar;F:\Projects\Java\diary-index\lib\stringtemplate-3.2.1.jar;F:\Projects\Java\diary-index\lib\wmf2svg-0.9.0.jar;F:\Projects\Java\diary-index\lib\xalan-2.7.1.jar;F:\Projects\Java\diary-index\lib\xhtmlrenderer-1.0.0.jar;F:\Projects\Java\diary-index\lib\xml-apis-1.3.04.jar;F:\Projects\Java\diary-index\lib\xmlgraphics-commons-1.4.jar;F:\Projects\Java\diary-index\test-lib\junit-4.10.jar" com.intellij.rt.execution.application.AppMain com.intellij.rt.execution.junit.JUnitStarter -ideVersion5 JournalReaderTest
log4j: reset attribute= "false".
log4j: Threshold ="null".
log4j: Level value for root is [debug].
log4j: root level set to DEBUG
log4j: Class name: [org.apache.log4j.ConsoleAppender]
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout"
log4j: Setting property [conversionPattern] to [%d{dd MMM yyyy HH:mm:ss} %5p %c{1} - %m%n].
log4j: Adding appender named [consoleAppender] to category [root].
java.io.FileNotFoundException: no such entry: "EncryptionInfo"
at org.apache.poi.poifs.filesystem.DirectoryNode.getEntry(DirectoryNode.java:375)
at org.apache.poi.poifs.filesystem.DirectoryNode.createDocumentInputStream(DirectoryNode.java:177)
at org.apache.poi.poifs.crypt.EncryptionInfo.<init>(EncryptionInfo.java:45)
at org.apache.poi.poifs.crypt.EncryptionInfo.<init>(EncryptionInfo.java:39)
at model.JournalReader.readEntries(JournalReader.java:43)
at JournalReaderTest.testReadEntries(JournalReaderTest.java:24)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
at org.junit.runner.JUnitCore.run(JUnitCore.java:157)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:76)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:195)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:63)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:120)
Process finished with exit code -1
POI 文档和 javadocs 一直没用。在考虑切换到 docx4j 的地方,我感到非常沮丧。这意味着将我所有的 .doc 文件转换为 .docx(当然,在适当的备份之后)。
我想知道是否有人成功使用 docx4j 读取加密的、受密码保护的文件。任何人?我只想得到一个肯定的答案,告诉我值得继续努力。
如果有人能看到我在 POI 上做错了什么,我也会很高兴知道这一点。谢谢。