4

我正在使用 VTD-XML,并且正在提取 XML 文档的子元素,并且需要检索父 XML 的根元素的所有命名空间声明。我目前正在以下列方式检索所有命名空间声明:

AutoPilot ap = new AutoPilot( vNav );
int tokenCount = vNav.getTokenCount();
String token = null;
String nsPrefix = null;
String nsUri = null;
for ( int i = 0; i < tokenCount; i++ ) {
    token = vNav.toNormalizedString( i );
    if ( vNav.startsWith( i, "xmlns:" ) ) {
        nsPrefix = token.substring( token.indexOf( ":" ) + 1 );
        nsUrl = vNav.toNormalizedString( i + 1 );
        ap.declareXPathNameSpace( nsPrefix, nsUrl );
    }// if
}// for

这为我提供了整个文档的所有命名空间声明,但是,有没有一种方法可以检索 ONE 元素的命名空间声明?

4

1 回答 1

3

Here's my class that I use during large XML Processing.

Some notes:

  • for any element, xmlns declarations in use come from this element and from all ancestors
  • when element <elem /> is a sibling of <elem xmlns="x" /> they have different namespaces
  • processing XML is generally stack-based

Code:

import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;

import javax.xml.namespace.QName;

import org.springframework.util.xml.SimpleNamespaceContext;

import com.ximpleware.NavException;
import com.ximpleware.VTDNav;

public class VtdXmlCurrentState
{
   /* Stack of QName.toString()s for the elements of processed XML - each is a full path */
   private Deque<String> qnames = new LinkedList<String>();
   /* Stack of QName.toString()s for the elements - each is a single path element and doesn't contain "/" */
   private Deque<QName> names = new LinkedList<QName>();

   // current depth in input document, starting from -1
   private int currentDepth = -1;

   // stack of namespace contexts increased during xml depth-first VTD navigation */
   private Deque<SimpleNamespaceContext> namespaces = new LinkedList<SimpleNamespaceContext>();

   // a flag for optimizing the case when there are many sibling elements without any xmlns declarations
   // the case:
   // <el />
   // <el />
   // ...
   // it allows to effectively manage the stack and properly handles the following case:
   // <el xmlns="x" />
   // <el />
   // in which the second element should use NSContext from parent and not a copy of sibling's NSContext
   private boolean lastNSContextsDifferent = false;

   /**
    * 
    */
   public VtdXmlCurrentState()
   {
      // first a context without any mapping
      this.namespaces.push(new SimpleNamespaceContext());
      // first QName is "/"
      this.qnames.push("/");
      this.names.push(null);
   }

   /**
    * Name of the current element
    * @return
    */
   public QName currentElementName()
   {
      return this.names.peek();
   }

   /**
    * Returns parent and current path for VTDNav
    * 
    * @param nav
    * @return
    * @throws NavException 
    */
   public String[] currentXPath(VTDNav nav) throws NavException
   {
      // we don't check the end - autopilot handles that
      int depth = nav.getCurrentDepth();
      int idx = nav.getCurrentIndex();

      this.handleNamespaces(nav, depth);

      // determining current XPath

      // name of the current element (optimization, because we're that the token is START_ELEMENT)
      String elName = nav.toRawString(nav.getTokenOffset(idx), nav.getTokenLength(idx) & 0xffff);
      QName qName = null;
      if (elName.contains(":")) {
         String[] qname = elName.split(":");
         qName = new QName(this.namespaces.peek().getNamespaceURI(qname[0]), qname[1]);
      } else {
         qName = new QName(this.namespaces.peek().getNamespaceURI(""), elName);
      }

      // full name of the current element
      StringBuilder sb = new StringBuilder(1024);
      String fullName = null;
      for (int i = 0; i <= this.currentDepth - depth; i++) {
         this.qnames.pop();
         this.names.pop();
      }
      fullName = sb.append(this.qnames.peek()).append(qName.toString()).append("/").toString();
      String parentName = this.qnames.peek();
      this.qnames.push(fullName);
      this.names.push(qName);

      this.currentDepth = depth;

      return new String[] { parentName, fullName };
   }

   /**
    * Handling element's namespaces - if there are any xmlns[:x], we must create new NSContext
    * 
    * @param nav
    * @param depth
    * @throws NavException
    */
   private void handleNamespaces(VTDNav nav, int depth) throws NavException
   {
      // are there any ns declarations?
      Map<String, String> _namespaces = null;
      int index = nav.getCurrentIndex() + 1;
      int total = nav.getTokenCount();
      while (index < total) {
         int type = nav.getTokenType(index);
         while (type == VTDNav.TOKEN_ATTR_NAME) {
            // quickly skip non-xmlns attrs
            index += 2;
            type = nav.getTokenType(index);
         }
         if (type == VTDNav.TOKEN_ATTR_NS) {
            String prefix = nav.toString(index).substring(5);
            if (prefix.length() > 0)
               prefix = prefix.substring(1);
            String namespace = nav.toString(index + 1);
            if (_namespaces == null)
               _namespaces = new HashMap<String, String>();
            _namespaces.put(prefix, namespace);
         } else if (type == VTDNav.TOKEN_ATTR_VAL) {
         } else {
            break;
         }
         index++;
      }

      if (_namespaces != null) {
         // first remove (if necessary) previous contexts from the stack - even if new element is at the same level
         // (not descendant - it's sibiling), remove old, push new
         for (int i = 0; i <= this.currentDepth - depth; i++)
            this.namespaces.pop();

         // for this element there's xmlns declaration - this element has different namespace context
         // and it will be valid till the next descendant with xmlns
         // previous context
         SimpleNamespaceContext snc = this.namespaces.peek();
         // new ...
         SimpleNamespaceContext newSnc = new SimpleNamespaceContext();
         // ... to which we'll copy previous declarations
         for (Iterator<?> prefixes = snc.getBoundPrefixes(); prefixes.hasNext();) {
            String pfx = (String)prefixes.next();
            newSnc.bindNamespaceUri(pfx, snc.getNamespaceURI(pfx));
         }
         newSnc.bindNamespaceUri("", snc.getNamespaceURI(""));
         // adding (overwriting!) new namespace mappings
         newSnc.setBindings(_namespaces);
         this.namespaces.push(newSnc);
         this.lastNSContextsDifferent = true;
      } else {
         // current element doesn't define new namespaces - it gets them from parent element
         // optimization - no new namesaces, the same level - we don't do anything!
         // we only do something if we got a level up - we have to pop some ns contexts
         for (int i = 0; i < this.currentDepth - depth; i++)
            this.namespaces.pop();
         if (this.currentDepth > depth) {
            // we went up and popped() too much ns contexts - we duplicate the most recent
            this.namespaces.push(this.namespaces.peek());
         } else if (this.currentDepth < depth) {
            // we went down - just copy
            this.namespaces.push(this.namespaces.peek());
         } else {
            // the same level
            if (this.lastNSContextsDifferent) {
               this.namespaces.pop();
               this.namespaces.push(this.namespaces.peek());
            }
         }

         this.lastNSContextsDifferent = false;
      }
   }
}

Input XML:

<?xml version="1.0" encoding="UTF-8"?>

<set id="#1" xmlns="urn:test:1.0">
   <documents xmlns="urn:test:1.0">
      <doc xmlns="urn:test:1.1" />
      <doc />
      <doc xmlns="urn:test:1.2" />
   </documents>
   <documents />
   <documents xmlns="" />
</set>

Using the class:

byte[] doc = FileCopyUtils.copyToByteArray(super.createResource("dom03.xml").getInputStream());

VTDGen vtd = new VTDGen();
vtd.setDoc(doc);
vtd.parse(true);

VTDNav nav = vtd.getNav();
AutoPilot ap = new AutoPilot();
ap.bind(nav);
ap.selectElementNS("*", "*");

VtdXmlCurrentState cxp = new VtdXmlCurrentState();
ap.iterate();
assertEquals("/{urn:test:1.0}set/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/{urn:test:1.0}documents/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/{urn:test:1.0}documents/{urn:test:1.1}doc/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/{urn:test:1.0}documents/{urn:test:1.0}doc/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/{urn:test:1.0}documents/{urn:test:1.2}doc/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/{urn:test:1.0}documents/", cxp.currentXPath(nav)[1]);
ap.iterate();
assertEquals("/{urn:test:1.0}set/documents/", cxp.currentXPath(nav)[1]);
assertFalse(ap.iterate());
于 2012-01-09T15:03:29.733 回答