2

我正在尝试使用 XJC/JAXB 创建一个 XSD 模式来处理以下本体:

http://archive.geneontology.org/latest-termdb/go_daily-termdb.rdf-xml.gz

(我知道 RDF 没有架构,但这个文件的格式总是相同的)。该文件如下所示:

<go:go xmlns:go="http://www.geneontology.org/dtds/go.dtd#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:RDF>
        <go:term rdf:about="http://www.geneontology.org/go#GO:0000001">
            <go:accession>GO:0000001</go:accession>
            <go:name>mitochondrion inheritance</go:name>
            <go:synonym>mitochondrial inheritance</go:synonym>
            <go:definition>The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by
 interactions between mitochondria and the cytoskeleton.</go:definition>
            <go:is_a rdf:resource="http://www.geneontology.org/go#GO:0048308" />
            <go:is_a rdf:resource="http://www.geneontology.org/go#GO:0048311" />
        </go:term>
        <go:term rdf:about="http://www.geneontology.org/go#GO:0000002">
       (...)

混合两个命名空间“go:”和“rdf:”。

我试图创建一个 XSD 架构。到目前为止,这是我的 xsd:

去.xsd

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:go="http://www.geneontology.org/dtds/go.dtd#"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    elementFormDefault="qualified"
    targetNamespace="http://www.geneontology.org/dtds/go.dtd#"
    attributeFormDefault="qualified"
    >

<xs:import namespace="http://www.w3.org/1999/02/22-rdf-syntax-ns#" schemaLocation="rdf.xsd"/>

<xs:group name="is_group">
  <xs:choice>
      <xs:element name="is_a" type="go:IsA" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="part_of" type="go:PartOf" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="negatively_regulates" type="go:NegativelyRegulates" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="positively_regulates" type="go:PositivelyRegulates" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="regulates" type="go:Regulates" minOccurs="0" maxOccurs="unbounded"/>
  </xs:choice>
</xs:group>


  <xs:complexType name="Term">
    <xs:sequence>
      <xs:element name="accession" type="xs:string" minOccurs="1" maxOccurs="1"/>
      <xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1"/>
      <xs:element name="synonym" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="definition" type="xs:string" minOccurs="0" maxOccurs="1"/>
      <xs:element name="comment" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
      <xs:group ref="go:is_group" minOccurs="0" maxOccurs="unbounded"/>
      <xs:element name="dbxref" type="go:DbXRef" minOccurs="0" maxOccurs="unbounded"/>
    </xs:sequence>
    <xs:attributeGroup ref="rdf:rdfabout"/>
  </xs:complexType>


  <xs:group name="goterm">
    <xs:sequence>
       <xs:element name="term" type="go:Term"  minOccurs="0" maxOccurs="unbounded" />
    </xs:sequence>
  </xs:group>


  <xs:complexType name="AbstractRelation" abstract="true">
    <xs:attributeGroup ref="rdf:rdfrsrc"/>
  </xs:complexType>

  <xs:complexType name="IsA">
        <xs:complexContent>
        <xs:extension base="go:AbstractRelation">
        </xs:extension>
    </xs:complexContent>
  </xs:complexType>

  <xs:complexType name="PartOf">
        <xs:complexContent>
        <xs:extension base="go:AbstractRelation">
        </xs:extension>
    </xs:complexContent>
  </xs:complexType>

  <xs:complexType name="NegativelyRegulates">
        <xs:complexContent>
        <xs:extension base="go:AbstractRelation">
        </xs:extension>
    </xs:complexContent>
  </xs:complexType>

  <xs:complexType name="PositivelyRegulates">
        <xs:complexContent>
        <xs:extension base="go:AbstractRelation">
        </xs:extension>
    </xs:complexContent>
  </xs:complexType>

  <xs:complexType name="Regulates">
        <xs:complexContent>
        <xs:extension base="go:AbstractRelation">
        </xs:extension>
    </xs:complexContent>
  </xs:complexType>



  <xs:complexType name="DbXRef">
    <xs:sequence>
      <xs:element name="database_symbol" type="xs:string" minOccurs="1" maxOccurs="1"/>
      <xs:element name="reference" type="xs:string" minOccurs="1" maxOccurs="1"/>
    </xs:sequence>
    <xs:attributeGroup ref="rdf:rdfparsetype"/>
  </xs:complexType>


  <xs:complexType name="Go">
    <xs:sequence>
      <xs:group ref="rdf:rdfroot" />
    </xs:sequence>
  </xs:complexType>

<xs:element name="go" type="go:Go" />

</xs:schema>

rdf.xsd

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:go="http://www.geneontology.org/dtds/go.dtd#"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    targetNamespace="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    elementFormDefault="qualified"
     attributeFormDefault="qualified"
    >
  <xs:import namespace="http://www.geneontology.org/dtds/go.dtd#" schemaLocation="go.xsd"/>
  <xs:attributeGroup name="rdfabout">
    <xs:attribute name="about" type="xs:string"/>
  </xs:attributeGroup>

  <xs:attributeGroup name="rdfrsrc">
    <xs:attribute name="resource" type="xs:string"/>
  </xs:attributeGroup>

  <xs:attributeGroup name="rdfparsetype">
    <xs:attribute name="parseType" type="xs:string" fixed="Resource" />
  </xs:attributeGroup>



  <xs:complexType name="RDF">
    <xs:sequence>
      <xs:group ref="go:goterm" />
    </xs:sequence>
  </xs:complexType>


<xs:group name="rdfroot">
  <xs:sequence>
      <xs:element name="RDF" type="rdf:RDF" minOccurs="1" maxOccurs="1"/>
  </xs:sequence>
</xs:group>

</xs:schema>

使用xmllint针对此架构验证 rdf 文件。

$ xmllint  --noout --schema go.xsd go.xml
go.xml validates

但是,在用 XJC 编译 xsd 并运行下面的 java 程序后,我没有得到 go:term 的主体:

import java.io.InputStream;
import java.io.StringWriter;
import org.geneontology.dtds.go.*;
import org.w3._1999._02._22_rdf_syntax_ns.*;
import javax.xml.namespace.QName;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.Marshaller;
import  javax.xml.transform.stream.StreamSource;


public class TestGo
    {
    public static void main(String[] args) throws Exception
        {
    JAXBContext jaxbCtxt=JAXBContext.newInstance(JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");
    Marshaller marshaller = jaxbCtxt.createMarshaller();
    Unmarshaller unmarshaller=jaxbCtxt.createUnmarshaller();
        marshaller.setProperty("jaxb.formatted.output",true);
        Go go=unmarshaller.unmarshal( new StreamSource(System.in), Go.class).getValue();
        marshaller.marshal(new JAXBElement<Go>(
            new QName("http://www.geneontology.org/dtds/go.dtd#", "go"),
            Go.class,
            go
            ), System.out);
        }
    }

编译&运行:

xjc -d tmp ../schemas/bio/go/go.xsd
javac -d tmp -sourcepath tmp:. TestGo.java
java -cp tmp TestGo < go.xml | head -n 20

输出:

<ns3:go xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ns3="http://www.geneontology.org/dtds/go.dtd#">
    <ns2:RDF>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000001"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000002"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000003"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000005"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0042254"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0044183"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0051082"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000006"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000007"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000008"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0003756"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0015036"/>
        <ns3:term ns2:about="http://www.geneontology.org/go#GO:0000009"/>

怎么了 ?我怎样才能得到 'go:term' 的正文?这是处理组的正确方法吗(例如: xsd:group name="goterm" 对我来说看起来很丑)

编辑:根据 Blaise 的评论更改了 JAXBContext.newInstance。

编辑:Blaise 的第二个 java 代码。

import java.io.InputStream;
import java.io.StringWriter;
import org.geneontology.dtds.go.*;
import org.w3._1999._02._22_rdf_syntax_ns.*;
import javax.xml.namespace.QName;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.Marshaller;
import  javax.xml.transform.stream.StreamSource;


public class TestGo2
    {
    public static void main(String[] args) throws Exception
        {
        Go go=new Go();
        RDF rdf=new RDF();
        go.setRDF(rdf);
        for(int i=0;i<2;++i)
            {
            Term t=new Term();
            t.setAbout("uri:"+i);
            t.setAccession("Hello"+i);
            rdf.getTerm().add(t);
            }

    JAXBContext jaxbCtxt=JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");
    Marshaller marshaller = jaxbCtxt.createMarshaller();

        marshaller.marshal(new JAXBElement<Go>(
            new QName("http://www.geneontology.org/dtds/go.dtd#", "go"),
            Go.class,
            go
            ), System.out);
        }
    }

输出:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ns3:go xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ns3="http://www.geneontology.org/dtds/go.dtd#">
<ns2:RDF>
<ns3:term ns2:about="uri:0">
 <accession>Hello0</accession>
</ns3:term>
<ns3:term ns2:about="uri:1">
  <accession>Hello1</accession>
</ns3:term>
</ns2:RDF>
</ns3:go>

这是我当前的 Makefile:

JAVA_HOME:=$(dir $(shell which java))..
JAVAC=${JAVA_HOME}/bin/javac
JAVA=${JAVA_HOME}/bin/java
XJC?=${JAVA_HOME}/bin/xjc

.PHONY=all go clean
all:go

go: go.xml
    -xmllint  --noout --schema ../schemas/bio/go/go.xsd $<
    mkdir -p tmp
    ${XJC} -d tmp ../schemas/bio/go/go.xsd 
    ${JAVAC} -d tmp -sourcepath tmp:. TestGo.java `find tmp -name "ObjectFactory.java"`
    ${JAVA}  -cp tmp TestGo < $< | head -n 20
    #rm -rf tmp

go.xml:
    curl "http://archive.geneontology.org/latest-termdb/go_daily-termdb.rdf-xml.gz" |\
    gunzip -c | grep -v "<!DOCTYPE " > $@

clean:
    rm -rf tmp go.xml
4

1 回答 1

1

首先要尝试的是在生成的包而不是其中一个类JAXBContext的冒号上创建:String

JAXBContext jc = JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");

演示

import java.io.File;
import javax.xml.bind.*;

public class Demo {

    public static void main(String[] args) throws Exception {
        JAXBContext jc = JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");

        Unmarshaller unmarshaller = jc.createUnmarshaller();
        File xml = new File("src/forum14236188/input.xml");
        Object o = unmarshaller.unmarshal(xml);

        Marshaller marshaller = jc.createMarshaller();
        marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        marshaller.marshal(o, System.out);
    }

}

输入.xml/输出

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<go xmlns="http://www.geneontology.org/dtds/go.dtd#" xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <ns2:RDF>
        <term ns2:about="http://www.geneontology.org/go#GO:0000001">
            <accession>GO:0000001</accession>
            <name>mitochondrion inheritance</name>
            <synonym>mitochondrial inheritance</synonym>
            <definition>The distribution of mitochondria, including the
                mitochondrial genome, into daughter cells after mitosis or meiosis,
                mediated by
                interactions between mitochondria and the cytoskeleton.
            </definition>
            <is_a ns2:resource="http://www.geneontology.org/go#GO:0048308"/>
            <is_a ns2:resource="http://www.geneontology.org/go#GO:0048311"/>
        </term>
    </ns2:RDF>
</go>
于 2013-01-09T13:32:50.367 回答