我正在尝试使用 XJC/JAXB 创建一个 XSD 模式来处理以下本体:
http://archive.geneontology.org/latest-termdb/go_daily-termdb.rdf-xml.gz
(我知道 RDF 没有架构,但这个文件的格式总是相同的)。该文件如下所示:
<go:go xmlns:go="http://www.geneontology.org/dtds/go.dtd#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:RDF>
<go:term rdf:about="http://www.geneontology.org/go#GO:0000001">
<go:accession>GO:0000001</go:accession>
<go:name>mitochondrion inheritance</go:name>
<go:synonym>mitochondrial inheritance</go:synonym>
<go:definition>The distribution of mitochondria, including the mitochondrial genome, into daughter cells after mitosis or meiosis, mediated by
interactions between mitochondria and the cytoskeleton.</go:definition>
<go:is_a rdf:resource="http://www.geneontology.org/go#GO:0048308" />
<go:is_a rdf:resource="http://www.geneontology.org/go#GO:0048311" />
</go:term>
<go:term rdf:about="http://www.geneontology.org/go#GO:0000002">
(...)
混合两个命名空间“go:”和“rdf:”。
我试图创建一个 XSD 架构。到目前为止,这是我的 xsd:
去.xsd
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:go="http://www.geneontology.org/dtds/go.dtd#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
elementFormDefault="qualified"
targetNamespace="http://www.geneontology.org/dtds/go.dtd#"
attributeFormDefault="qualified"
>
<xs:import namespace="http://www.w3.org/1999/02/22-rdf-syntax-ns#" schemaLocation="rdf.xsd"/>
<xs:group name="is_group">
<xs:choice>
<xs:element name="is_a" type="go:IsA" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="part_of" type="go:PartOf" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="negatively_regulates" type="go:NegativelyRegulates" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="positively_regulates" type="go:PositivelyRegulates" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="regulates" type="go:Regulates" minOccurs="0" maxOccurs="unbounded"/>
</xs:choice>
</xs:group>
<xs:complexType name="Term">
<xs:sequence>
<xs:element name="accession" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="synonym" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="definition" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="comment" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
<xs:group ref="go:is_group" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="dbxref" type="go:DbXRef" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attributeGroup ref="rdf:rdfabout"/>
</xs:complexType>
<xs:group name="goterm">
<xs:sequence>
<xs:element name="term" type="go:Term" minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
</xs:group>
<xs:complexType name="AbstractRelation" abstract="true">
<xs:attributeGroup ref="rdf:rdfrsrc"/>
</xs:complexType>
<xs:complexType name="IsA">
<xs:complexContent>
<xs:extension base="go:AbstractRelation">
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="PartOf">
<xs:complexContent>
<xs:extension base="go:AbstractRelation">
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="NegativelyRegulates">
<xs:complexContent>
<xs:extension base="go:AbstractRelation">
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="PositivelyRegulates">
<xs:complexContent>
<xs:extension base="go:AbstractRelation">
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="Regulates">
<xs:complexContent>
<xs:extension base="go:AbstractRelation">
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="DbXRef">
<xs:sequence>
<xs:element name="database_symbol" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="reference" type="xs:string" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
<xs:attributeGroup ref="rdf:rdfparsetype"/>
</xs:complexType>
<xs:complexType name="Go">
<xs:sequence>
<xs:group ref="rdf:rdfroot" />
</xs:sequence>
</xs:complexType>
<xs:element name="go" type="go:Go" />
</xs:schema>
rdf.xsd
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:go="http://www.geneontology.org/dtds/go.dtd#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
targetNamespace="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
elementFormDefault="qualified"
attributeFormDefault="qualified"
>
<xs:import namespace="http://www.geneontology.org/dtds/go.dtd#" schemaLocation="go.xsd"/>
<xs:attributeGroup name="rdfabout">
<xs:attribute name="about" type="xs:string"/>
</xs:attributeGroup>
<xs:attributeGroup name="rdfrsrc">
<xs:attribute name="resource" type="xs:string"/>
</xs:attributeGroup>
<xs:attributeGroup name="rdfparsetype">
<xs:attribute name="parseType" type="xs:string" fixed="Resource" />
</xs:attributeGroup>
<xs:complexType name="RDF">
<xs:sequence>
<xs:group ref="go:goterm" />
</xs:sequence>
</xs:complexType>
<xs:group name="rdfroot">
<xs:sequence>
<xs:element name="RDF" type="rdf:RDF" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
</xs:group>
</xs:schema>
使用xmllint针对此架构验证 rdf 文件。
$ xmllint --noout --schema go.xsd go.xml
go.xml validates
但是,在用 XJC 编译 xsd 并运行下面的 java 程序后,我没有得到 go:term 的主体:
import java.io.InputStream;
import java.io.StringWriter;
import org.geneontology.dtds.go.*;
import org.w3._1999._02._22_rdf_syntax_ns.*;
import javax.xml.namespace.QName;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.Marshaller;
import javax.xml.transform.stream.StreamSource;
public class TestGo
{
public static void main(String[] args) throws Exception
{
JAXBContext jaxbCtxt=JAXBContext.newInstance(JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");
Marshaller marshaller = jaxbCtxt.createMarshaller();
Unmarshaller unmarshaller=jaxbCtxt.createUnmarshaller();
marshaller.setProperty("jaxb.formatted.output",true);
Go go=unmarshaller.unmarshal( new StreamSource(System.in), Go.class).getValue();
marshaller.marshal(new JAXBElement<Go>(
new QName("http://www.geneontology.org/dtds/go.dtd#", "go"),
Go.class,
go
), System.out);
}
}
编译&运行:
xjc -d tmp ../schemas/bio/go/go.xsd
javac -d tmp -sourcepath tmp:. TestGo.java
java -cp tmp TestGo < go.xml | head -n 20
输出:
<ns3:go xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ns3="http://www.geneontology.org/dtds/go.dtd#">
<ns2:RDF>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000001"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000002"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000003"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000005"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0042254"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0044183"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0051082"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000006"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000007"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000008"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0003756"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0015036"/>
<ns3:term ns2:about="http://www.geneontology.org/go#GO:0000009"/>
怎么了 ?我怎样才能得到 'go:term' 的正文?这是处理组的正确方法吗(例如: xsd:group name="goterm" 对我来说看起来很丑)
编辑:根据 Blaise 的评论更改了 JAXBContext.newInstance。
编辑:Blaise 的第二个 java 代码。
import java.io.InputStream;
import java.io.StringWriter;
import org.geneontology.dtds.go.*;
import org.w3._1999._02._22_rdf_syntax_ns.*;
import javax.xml.namespace.QName;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.Marshaller;
import javax.xml.transform.stream.StreamSource;
public class TestGo2
{
public static void main(String[] args) throws Exception
{
Go go=new Go();
RDF rdf=new RDF();
go.setRDF(rdf);
for(int i=0;i<2;++i)
{
Term t=new Term();
t.setAbout("uri:"+i);
t.setAccession("Hello"+i);
rdf.getTerm().add(t);
}
JAXBContext jaxbCtxt=JAXBContext.newInstance("org.geneontology.dtds.go:org.w3._1999._02._22_rdf_syntax_ns");
Marshaller marshaller = jaxbCtxt.createMarshaller();
marshaller.marshal(new JAXBElement<Go>(
new QName("http://www.geneontology.org/dtds/go.dtd#", "go"),
Go.class,
go
), System.out);
}
}
输出:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ns3:go xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ns3="http://www.geneontology.org/dtds/go.dtd#">
<ns2:RDF>
<ns3:term ns2:about="uri:0">
<accession>Hello0</accession>
</ns3:term>
<ns3:term ns2:about="uri:1">
<accession>Hello1</accession>
</ns3:term>
</ns2:RDF>
</ns3:go>
这是我当前的 Makefile:
JAVA_HOME:=$(dir $(shell which java))..
JAVAC=${JAVA_HOME}/bin/javac
JAVA=${JAVA_HOME}/bin/java
XJC?=${JAVA_HOME}/bin/xjc
.PHONY=all go clean
all:go
go: go.xml
-xmllint --noout --schema ../schemas/bio/go/go.xsd $<
mkdir -p tmp
${XJC} -d tmp ../schemas/bio/go/go.xsd
${JAVAC} -d tmp -sourcepath tmp:. TestGo.java `find tmp -name "ObjectFactory.java"`
${JAVA} -cp tmp TestGo < $< | head -n 20
#rm -rf tmp
go.xml:
curl "http://archive.geneontology.org/latest-termdb/go_daily-termdb.rdf-xml.gz" |\
gunzip -c | grep -v "<!DOCTYPE " > $@
clean:
rm -rf tmp go.xml