3

我正在使用 JAXB2 在 Spring-WS 中执行 OXM。我指定的 XSD 需要将一个大型 XML 文件附加到 soap 消息中,因此我使用 MTOM 传输文件并在我的 JAXB2Marshaller 上启用了 MTOM。

当 JAXB2 编组具有预期 mime 类型的 text/xml 的 MTOM 附件时,它将该元素作为 javax.xml.transform.Source 对象提供。经过一番搜索,我能够找出如何将该 Source 对象发送到文件。

final Source source = request.getSource();
StreamSource streamSource = (StreamSource) source;
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
File file = new File ("/tempxmlfile.xml");
try{
    transformer.transform(streamSource, new StreamResult(file));
    LOG.info("File saved in "+file.getAbsolutePath());
    }
catch(Exception ex){
        ex.getMessage();
    }

我遇到的问题是,当我将 UTF-8 编码文件作为附件发送时,出现以下错误:

[Fatal Error] :1:1: Content is not allowed in prolog.
ERROR:  'Content is not allowed in prolog.'

这是由文件中编码文本前面的字节顺序标记引起的,尽管 UTF-8 编码文件中不需要此 BOM,但 Unicode 标准允许,Java 不支持 UTF-8 编码的 BOM流。

我可以通过发送没有 BOM 的文件来解决这个问题,但这并不可行,因为它会导致大多数插入 BOM 的 Microsoft 产品出现问题。

Sun/Oracle 拒绝使用 Streams 解决此问题有很多解决方法,但它们都要求您有权访问 Stream,JAXB2 提供的 Source Object 没有 InputStream 它只有一个 Reader 对象。有没有办法让我解决这个问题,或者通过使用知道如何忽略 UTF-8 编码中的 BOM 的阅读器包装 Sources Reader 对象,或者更改 JAXB2 将附件读取到源中的方式,以便它可以忽略UTF-8 编码的 BOM。

在此先感谢,克雷格

4

1 回答 1

3

诀窍是“标记”阅读器。如果您的阅读器不支持标记,您可以将其包装在 BufferedReader 中:

选项 #1 - 检查 BOM 并将其删除

我相信我的原始代码错误地编写了 BOM。下面的源代码更有意义:

import java.io.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

public class Demo {

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00};
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE};
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF};

    public static void main(String[] args) throws Exception {
        // Create an XML document with a BOM
        FileOutputStream fos = new FileOutputStream("bom.xml");
        writeBOM(fos, UTF16LE);

        OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8");
        oswUTF8.write("<root/>");
        oswUTF8.close();

        // Create a Source based on a Reader to simulate source.getRequest()
        StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml")));

        // Wrap reader in BufferedReader so it will support marking
        Reader reader = new BufferedReader(attachment.getReader());

        // Remove the BOM
        removeBOM(reader);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer t = tf.newTransformer();
        t.transform(new StreamSource(reader), new StreamResult(System.out));
    }

    private static void writeBOM(OutputStream os, char[] bom) throws Exception {
        for(int x=0; x<bom.length; x++) {
            os.write((byte) bom[x]);
        }
    }

    private static void removeBOM(Reader reader) throws Exception {
        if(removeBOM(reader, UTF32BE)) {
            return;
        }
        if(removeBOM(reader, UTF32LE)) {
            return;
        }
        if(removeBOM(reader, UTF16BE)) {
            return;
        }
        if(removeBOM(reader, UTF16LE)) {
            return;
        }
        if(removeBOM(reader, UTF8)) {
            return;
        }
    }

    private static boolean removeBOM(Reader reader, char[] bom) throws Exception {
        int bomLength = bom.length;
        reader.mark(bomLength);
        char[] possibleBOM = new char[bomLength];
        reader.read(possibleBOM);
        for(int x=0; x<bomLength; x++) {
            if(bom[x] != possibleBOM[x]) {
                reader.reset();
                return false;
            }
        }
        return true;
    }

}

选项 #2 - 找到“<”并将阅读器提前到该点

阅读直到你点击'<'利用标记/重置:

import java.io.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

public class Demo2 {

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00};
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE};
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF};

    public static void main(String[] args) throws Exception {
        // Create an XML document with a BOM
        FileOutputStream fos = new FileOutputStream("bom.xml");
        writeBOM(fos, UTF16BE);

        OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8");
        oswUTF8.write("<root/>");
        oswUTF8.close();

        // Create a Source based on a Reader to simulate source.getRequest()
        StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml")));

        // Wrap reader in BufferedReader so it will support marking
        Reader reader = new BufferedReader(attachment.getReader());

        // Remove the BOM
        removeBOM(reader);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer t = tf.newTransformer();
        t.transform(new StreamSource(reader), new StreamResult(System.out));
    }

    private static void writeBOM(OutputStream os, char[] bom) throws Exception {
        for(int x=0; x<bom.length; x++) {
            os.write((byte) bom[x]);
        }
    }

    private static Reader removeBOM(Reader reader) throws Exception {
        reader.mark(1);
        char[] potentialStart = new char[1];
        reader.read(potentialStart);
        if('<' == potentialStart[0]) {
            reader.reset();
            return reader;
        } else {
            return removeBOM(reader);
        }
    }

}
于 2011-01-18T15:49:53.250 回答