-3

我遇到了 POI 问题,需要一些帮助。

我有一个个人日记,我已经保存了多年,每月将每日条目写入一个 Word .doc,存储在 year 文件夹中。我添加了一个密码来打开每一个,所以它们都是加密的。

我想使用 Lucene 来索引整个集合,以便更好地搜索(例如,“我最后一次在哪一天和哪一年写​​下我有多喜欢燕麦片?”)。

第一步是使用 POI 读取 Word .doc,但我无法摆脱困境,因为它无法读取我的加密文件。

我写了这个类:

package model;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.DocumentProperties;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.dev.POIFSLister;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.LinkedList;
import java.util.List;

/**
 * JournalReader class that's the heart of my efforts to finally read, parse, index, and search my journal.
 * @author Michael
 * @link
 * @since 8/19/12 3:48 PM
 */
public class JournalReader {
    public static final Log LOGGER = LogFactory.getLog(JournalReader.class);
    public static final String DEFAULT_PASSWORD = "journal";


    public static void main(String[] args) {
        if (args.length > 0) {
            try {
                POIFSLister.viewFile(args[0], true);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    public List<JournalEntry> readEntries(File journalFile) throws IOException, GeneralSecurityException {
        List<JournalEntry> journalEntries = new LinkedList<JournalEntry>();
        if (journalFile != null) {
            POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(journalFile));
            // Exception is thrown after info line is executed.
            EncryptionInfo info = new EncryptionInfo(fs);
            Decryptor decryptor = Decryptor.getInstance(info);
            decryptor.verifyPassword(DEFAULT_PASSWORD);
            HWPFDocument journalDocument = new HWPFDocument(decryptor.getDataStream(fs));
            DocumentProperties documentProperties = journalDocument.getDocProperties();
        }
        return journalEntries;
    }
}

我有一个 JUnit 测试来尝试一下:

import model.JournalEntry;
import model.JournalReader;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.List;

/**
 * JournalReaderTest JUnit test for JournalReader
 * @author Michael
 * @link
 * @since 8/19/12 8:46 PM
 */
public class JournalReaderTest {

    @Test
    public void testReadEntries() throws IOException, GeneralSecurityException {
        JournalReader journalReader = new JournalReader();
        String journalFilePath = "C:\\Users\\Michael\\Documents\\Stuff To Back Up\\Journal\\1994\\AUG94.doc";
        File journalFile = new File(journalFilePath);
        List<JournalEntry> journalEntries = journalReader.readEntries(journalFile);
        Assert.assertNotNull(journalEntries);
        Assert.assertTrue(journalEntries.size() > 0);
    }
}

当我运行 JUnit 测试时,我得到了这个堆栈跟踪:

"C:\Program Files\Java\jdk1.7.0_02\bin\java" -ea -Didea.launcher.port=7540 "-Didea.launcher.bin.path=C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\lib\idea_rt.jar;C:\Program Files (x86)\JetBrains\IntelliJ IDEA 122.29\plugins\junit\lib\junit-rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\alt-rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\jce.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\resources.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\rt.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.7.0_02\jre\lib\ext\zipfs.jar;F:\Projects\Java\diary-index\out\test\diary-index;F:\Projects\Java\diary-index\out\production\diary-index;F:\Projects\Java\diary-index\lib\commons-lang3-3.1.jar;F:\Projects\Java\diary-index\lib\log4j-1.2.16.jar;F:\Projects\Java\diary-index\lib\commons-io-2.3.jar;F:\Projects\Java\diary-index\lib\poi-scratchpad-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-examples-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-excelant-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-ooxml-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\poi-ooxml-schemas-3.8-20120326.jar;F:\Projects\Java\diary-index\lib\dom4j-1.6.1.jar;F:\Projects\Java\diary-index\lib\stax-api-1.0.1.jar;F:\Projects\Java\diary-index\lib\xmlbeans-2.3.0.jar;F:\Projects\Java\diary-index\lib\antlr-2.7.7.jar;F:\Projects\Java\diary-index\lib\antlr-runtime-3.3.jar;F:\Projects\Java\diary-index\lib\avalon-framework-api-4.3.1.jar;F:\Projects\Java\diary-index\lib\avalon-framework-impl-4.3.1.jar;F:\Projects\Java\diary-index\lib\commons-codec-1.3.jar;F:\Projects\Java\diary-index\lib\commons-io-1.3.1.jar;F:\Projects\Java\diary-index\lib\commons-lang-2.4.jar;F:\Projects\Java\diary-index\lib\commons-logging-1.1.1.jar;F:\Projects\Java\diary-index\lib\docx4j-2.8.0.jar;F:\Projects\Java\diary-index\lib\fop-1.0.jar;F:\Projects\Java\diary-index\lib\itext-2.1.7.jar;F:\Projects\Java\diary-index\lib\jaxb-svg11-1.0.2.jar;F:\Projects\Java\diary-index\lib\jaxb-xmldsig-core-1.0.0.jar;F:\Projects\Java\diary-index\lib\jaxb-xslfo-1.0.1.jar;F:\Projects\Java\diary-index\lib\log4j-1.2.15.jar;F:\Projects\Java\diary-index\lib\poi-3.8.jar;F:\Projects\Java\diary-index\lib\poi-scratchpad-3.8.jar;F:\Projects\Java\diary-index\lib\serializer-2.7.1.jar;F:\Projects\Java\diary-index\lib\stringtemplate-3.2.1.jar;F:\Projects\Java\diary-index\lib\wmf2svg-0.9.0.jar;F:\Projects\Java\diary-index\lib\xalan-2.7.1.jar;F:\Projects\Java\diary-index\lib\xhtmlrenderer-1.0.0.jar;F:\Projects\Java\diary-index\lib\xml-apis-1.3.04.jar;F:\Projects\Java\diary-index\lib\xmlgraphics-commons-1.4.jar;F:\Projects\Java\diary-index\test-lib\junit-4.10.jar" com.intellij.rt.execution.application.AppMain com.intellij.rt.execution.junit.JUnitStarter -ideVersion5 JournalReaderTest
log4j: reset attribute= "false".
log4j: Threshold ="null".
log4j: Level value for root is  [debug].
log4j: root level set to DEBUG
log4j: Class name: [org.apache.log4j.ConsoleAppender]
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout"
log4j: Setting property [conversionPattern] to [%d{dd MMM yyyy HH:mm:ss} %5p %c{1} - %m%n].
log4j: Adding appender named [consoleAppender] to category [root].

java.io.FileNotFoundException: no such entry: "EncryptionInfo"
    at org.apache.poi.poifs.filesystem.DirectoryNode.getEntry(DirectoryNode.java:375)
    at org.apache.poi.poifs.filesystem.DirectoryNode.createDocumentInputStream(DirectoryNode.java:177)
    at org.apache.poi.poifs.crypt.EncryptionInfo.<init>(EncryptionInfo.java:45)
    at org.apache.poi.poifs.crypt.EncryptionInfo.<init>(EncryptionInfo.java:39)
    at model.JournalReader.readEntries(JournalReader.java:43)
    at JournalReaderTest.testReadEntries(JournalReaderTest.java:24)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:157)
    at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:76)
    at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:195)
    at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:63)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:120)


Process finished with exit code -1

POI 文档和 javadocs 一直没用。在考虑切换到 docx4j 的地方,我感到非常沮丧。这意味着将我所有的 .doc 文件转换为 .docx(当然,在适当的备份之后)。

我想知道是否有人成功使用 docx4j 读取加密的、受密码保护的文件。任何人?我只想得到一个肯定的答案,告诉我值得继续努力。

如果有人能看到我在 POI 上做错了什么,我也会很高兴知道这一点。谢谢。

4

1 回答 1

2

披露:我在 docx4j 上工作

docx4j 的 OpcPackage 包含:

/**
 * Convenience method to create a WordprocessingMLPackage
 * or PresentationMLPackage
 * from an existing File (.docx/.docxm, .ppxtx or Flat OPC .xml).
 *
 * @param docxFile
 *            The docx file
 * @param password
 *            The password, if the file is password protected (compound)
 *            
 * @Since 2.8.0           
 */ 
public static OpcPackage load(final java.io.File docxFile, String password) throws Docx4JException

它应该照顾受密码保护的部分。

我自己对 docx 文件的加密/解密没怎么玩。

于 2012-08-23T22:28:30.850 回答