0

我正在用 SAX 解析一个 XML,到目前为止一切都很好,但是这个 xml 是一个称为行集的微软模式,因此每列都被命名为 c0、c1、c2 等等,但是列的实际名称存储在模式中,它在同一个 XML 文件中。

请看下面的例子:

    <?xml version="1.0" encoding="utf-8"?>
<RS xmlns="urn:schemas-microsoft-com:xml-analysis:rowset">
    <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:saw-sql="urn:saw-sql" targetNamespace="urn:schemas-microsoft-com:xml-analysis:rowset">
        <xsd:complexType name="R">
            <xsd:sequence>
                <xsd:element name="C0" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Opportunity ID" saw-sql:columnID="c55007aeb4fd2c909"/>
                <xsd:element name="C1" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Opportunity Name" saw-sql:columnID="c9e466deb3ad5a949"/>
                <xsd:element name="C2" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Closed Flag" saw-sql:columnID="c4f1b71539820e65c"/>
                <xsd:element name="C3" type="xsd:dateTime" minOccurs="0" maxOccurs="1" saw-sql:type="timestamp" saw-sql:columnHeading="Actual Close Date" saw-sql:columnID="c007ad0eb3ebae22d"/>
                <xsd:element name="C4" type="xsd:dateTime" minOccurs="0" maxOccurs="1" saw-sql:type="timestamp" saw-sql:columnHeading="Creation Date" saw-sql:columnID="c8bf417f59e742c63"/>
                <xsd:element name="C5" type="xsd:double" minOccurs="0" maxOccurs="1" saw-sql:type="double" saw-sql:columnHeading="Line Win Probability" saw-sql:columnID="c49c9eb926449bccc"/>
                <xsd:element name="C6" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="First Name" saw-sql:columnID="c7c2e5dc4832c9e81"/>
                <xsd:element name="C7" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Last Name" saw-sql:columnID="c0fc239ab56275027"/>
                <xsd:element name="C8" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Base Resource Name" saw-sql:columnID="c7c53a4daa37f1637"/>
                <xsd:element name="C9" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Level - 1 Resource Name" saw-sql:columnID="ccb4a337a024e6838"/>
                <xsd:element name="C10" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Level - 2 Resource Name" saw-sql:columnID="cfbde2b815500dd87"/>
                <xsd:element name="C11" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Level - 3 Resource Name" saw-sql:columnID="cef1347354936a6a8"/>
                <xsd:element name="C12" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Level - 4 Resource Name" saw-sql:columnID="cbdc3fbd438cafb0f"/>
                <xsd:element name="C13" type="xsd:string" minOccurs="1" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Current Top-Level Resource Name" saw-sql:columnID="c7d7bc50e00cb94ae"/>
                <xsd:element name="C14" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Name" saw-sql:columnID="c08c8bd262c9cfaac"/>
                <xsd:element name="C15" type="xsd:int" minOccurs="0" maxOccurs="1" saw-sql:type="integer" saw-sql:columnHeading="# Days in Stage" saw-sql:columnID="c5b0dbd396ffe3a62"/>
                <xsd:element name="C16" type="xsd:double" minOccurs="0" maxOccurs="1" saw-sql:type="double" saw-sql:columnHeading="Revenue" saw-sql:columnID="cdc65489635ccdda0"/>
                <xsd:element name="C17" type="xsd:dateTime" minOccurs="0" maxOccurs="1" saw-sql:type="timestamp" saw-sql:columnHeading="Close Date" saw-sql:columnID="c4bd9fe19ff33072d"/>
                <xsd:element name="C18" type="xsd:string" minOccurs="0" maxOccurs="1" saw-sql:type="varchar" saw-sql:columnHeading="Opportunity Number" saw-sql:columnID="c6429e0148a929b79"/>
            </xsd:sequence>
        </xsd:complexType>
    </xsd:schema>
    <R>
        <C0>100000009829205</C0>
        <C1>Upgrading Network Servers</C1>
        <C2>Y</C2>
        <C3>2013-01-16T00:00:00</C3>
        <C4>2012-05-01T00:00:00</C4>
        <C5>100</C5>
        <C6>Lisa</C6>
        <C7>Jones</C7>
        <C8>Pepe papas</C8>
        <C9>Pepe papas</C9>
        <C10>Pepe papas</C10>
        <C11>Pepe papas</C11>
        <C12>Pepe papas</C12>
        <C13>Perro Hunter</C13>
        <C14>Amazing customer (SEATTLE, US)</C14>
        <C15></C15>
        <C16></C16>
        <C17></C17>
        <C18>RUP3-Opty-1</C18>
    </R>
    ...

可以看到,colum 的实际名称存储在模式的 saw-sql:columnHeading 属性中,我可以将 name 属性与列标记名匹配,但我不知道如何从 SAX 访问模式处理程序,

到目前为止我有这个

try {

            SAXParserFactory factory = SAXParserFactory.newInstance();
            SAXParser saxParser = factory.newSAXParser();

            DefaultHandler handler = new DefaultHandler() {

                boolean bfname = false;
                boolean blname = false;
                boolean bnname = false;
                boolean bsalary = false;

                public void startElement(String uri, String localName, String qName,
                                         Attributes attributes) throws SAXException {
                    System.out.println("Colum :" + qName);
                }

                public void characters(char ch[], int start, int length) throws SAXException {
                    String value = new String(ch, start, length);
                    System.out.println("value: "+ value);
                }

            };

            InputSource is = new InputSource(new StringReader(xmlContent));
            is.setEncoding("UTF-8");

            saxParser.parse(is, handler);


        } catch (Exception e) {
            e.printStackTrace();
        }

有任何想法吗?

4

1 回答 1

1

SAX 处理程序将按startElement文档顺序记录事件。第一个将是RS, then xsd:schema, thenxsd:complexType等。您需要跟踪每个事件并编写计算表所在位置的逻辑。SAX 是一个低级解析器,默认情况下不保留文档结构。

我希望您将拥有一个用于架构的索引本地数据结构(可能是一个表),并且您将为每一列添加一个条目。然后对于读取的每一行,您将在架构中查找详细信息(例如,通过 getColumn("C1") 等)

对于小问题,您可能希望将整个文档读入 DOM(我使用 XOM),而 XPath 可用于使引用易于编写。

于 2013-04-16T18:48:53.863 回答