3

如何使用 hwpf.converter.WordToFo 类将 .doc 文件转换为 FO?我试过搜索,但我只能得到一个单词到 html 的转换。我还阅读了 apache-poi 站点上的 WordToFO 手册,但无法得到它。

使用 Apache POI 将 Word 转换为 HTML

我尝试使用以下代码将 .doc 转换为 .fo,但是在使用 apache-fop 将 .fo 文件转换为 .png 后,我无法获取 word 文件中存在的图像。

package word2fo;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;

import javax.swing.text.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.apache.poi.hwpf.converter.WordToFoUtils;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.w3c.dom.Node;

public class Doc2Fo{
    public static void main(String[] args) throws Exception {
        System.out.println("reached 1");
        HWPFDocumentCore wordDocument = WordToFoUtils.loadDoc(new FileInputStream("D:\\Magna.doc"));
        System.out.println("reached 2");
        WordToFoConverter wordToFoConverter = new WordToFoConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        System.out.println("reached 3");
        wordToFoConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToFoConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource((Node) htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        System.out.println("reached 4");

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer;
        try {
            serializer = tf.newTransformer();
             serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                //serializer.setOutputProperty(OutputKeys.METHOD, "xml-fo");
                serializer.transform(domSource, streamResult);
                out.close();

                String result = new String(out.toByteArray());
                System.out.println(result);

        } catch (TransformerConfigurationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

}
}
4

1 回答 1

2
    HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName));

    WordToFoConverter wordToFoConverter = new WordToFoConverter(XMLHelper.getDocumentBuilderFactory().newDocumentBuilder().newDocument());
    wordToFoConverter.processDocument(hwpfDocument);

    StringWriter stringWriter = new StringWriter();

    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.transform(new DOMSource(wordToFoConverter.getDocument()), new StreamResult(stringWriter));

    String result = stringWriter.toString();
    return result;
于 2015-09-30T10:34:31.447 回答