根据需要,我正在尝试将 doc 或 docx (Microsoft word) 文件转换为 html 格式Apache tika
我最终得到以下代码,它工作正常,但它没有向结果 html 添加任何样式表。
import javax.xml.transform.OutputKeys;
import java.io.*;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.detect.DefaultDetector;
public class DocxConvert
{
public static void main(String []args)
{
InputStream input=null;
try
{
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD,"html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT,"yes");
handler.setResult(new StreamResult(sw));
input = new FileInputStream("f:\\file.doc");
DefaultDetector detector = new DefaultDetector();
Metadata metadata = new Metadata();
org.apache.tika.parser.Parser parser = new AutoDetectParser(detector);
parser.parse(input, handler, metadata, new ParseContext());
System.out.print(sw.toString());
}
catch (Exception ex)
{
ex.printStackTrace();
}
finally {
try {
input.close();
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
有没有办法添加/生成样式表来输出?请帮忙!