9

我创建了一个 XML 模式 (foo.xsd) 并用于xjc为 JAXB 创建绑定类。假设根元素是collection并且我正在编写 Ndocument个对象,它们是复杂类型。

因为我打算写出大型 XML 文件,所以我使用 Stax 来写出collection根元素,并使用 JAXB 来编组文档子树,使用Marshaller.marshal(JAXBElement, XMLEventWriter). 这是jaxb 的非官方用户指南推荐的方法。

我的问题是,如何在编组时验证 XML?如果我将模式绑定到 JAXB 编组器(Marshaller.setSchema()使用或类似collectionStax XMLEventWriter东西。

对这种整体方法的任何评论都会有所帮助。基本上我希望能够用于JAXB编组和解组大型 XML 文档而不会耗尽内存,所以如果有更好的方法来做到这一点,请告诉我。

4

2 回答 2

3

一些 Stax 实现似乎能够验证输出。请参阅以下类似问题的答案:

将 Stax2 与 Woodstox 一起使用

于 2010-03-20T23:02:04.737 回答
1

只有在 Marshaller 调用Iterator.next(). 然后一次调用marshal()将产生一个巨大的经过验证的 XML。您不会耗尽内存,因为已经序列化的 bean 会被 GC 收集。

null另外,如果需要有条件地跳过,也可以作为集合元素返回。不会有NPE。

即使在巨大的 XML 上,XML 模式验证器本身似乎也消耗很少的内存。

请参阅 JAXB 的ArrayElementProperty.serializeListBody()

import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;

import javax.xml.XMLConstants;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Marshaller;
import javax.xml.bind.SchemaOutputResolver;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAnyElement;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.namespace.QName;
import javax.xml.transform.Result;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;

@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name = "TestHuge")
public class TestHuge {

    static final boolean MISPLACE_HEADER = true;

    private static final int LIST_SIZE = 20000;

    static final String HEADER = "Header";

    static final String DATA = "Data";

    @XmlElement(name = HEADER)
    String header;

    @XmlElement(name = DATA)
    List<String> data;

    @XmlAnyElement
    List<Object> content;

    public static void main(final String[] args) throws Exception {

        final JAXBContext jaxbContext = JAXBContext.newInstance(TestHuge.class);

        final Schema schema = genSchema(jaxbContext);

        final Marshaller marshaller = jaxbContext.createMarshaller();
        marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
        marshaller.setSchema(schema);

        final TestHuge instance = new TestHuge();

        instance.content = new AbstractList<Object>() {

            @Override
            public Object get(final int index) {
                return instance.createChild(index);
            }

            @Override
            public int size() {
                return LIST_SIZE;
            }
        };

        // throws MarshalException ... Invalid content was found starting with element 'Header'
        marshaller.marshal(instance, new Writer() {

            @Override
            public void write(final char[] cbuf, final int off, final int len) throws IOException {}

            @Override
            public void write(final int c) throws IOException {}

            @Override
            public void flush() throws IOException {}

            @Override
            public void close() throws IOException {}
        });

    }

    private JAXBElement<String> createChild(final int index) {
        if (index % 1000 == 0) {
            System.out.println("serialized so far: " + index);
        }
        final String tag = index == getHeaderIndex(content) ? HEADER : DATA;

        final String bigStr = new String(new char[1000000]);
        return new JAXBElement<String>(new QName(tag), String.class, bigStr);
    }

    private static int getHeaderIndex(final List<?> list) {
        return MISPLACE_HEADER ? list.size() - 1 : 0;
    }

    private static Schema genSchema(final JAXBContext jc) throws Exception {
        final List<StringWriter> outs = new ArrayList<>();
        jc.generateSchema(new SchemaOutputResolver() {

            @Override
            public Result createOutput(final String namespaceUri, final String suggestedFileName)
                                                                                                  throws IOException {
                final StringWriter out = new StringWriter();
                outs.add(out);
                final StreamResult streamResult = new StreamResult(out);
                streamResult.setSystemId("");
                return streamResult;
            }
        });
        final StreamSource[] sources = new StreamSource[outs.size()];
        for (int i = 0; i < outs.size(); i++) {
            final StringWriter out = outs.get(i);
            sources[i] = new StreamSource(new StringReader(out.toString()));
        }
        final SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        final Schema schema = sf.newSchema(sources);
        return schema;
    }
}
于 2020-08-23T17:54:24.323 回答