我有一个 XML 文档,我需要解析以便为每个项目提取以下值:
<title>
<description>
<articleDate>
<story>
<author>
<photo>
<caption>
我正在尝试使用 DOM,但我迷路了,因为每个节点
<item>
根的
<channel>
有3个孩子:
<article>
<media>
<link>
我有一个 XML 文档,我需要解析以便为每个项目提取以下值:
<title>
<description>
<articleDate>
<story>
<author>
<photo>
<caption>
我正在尝试使用 DOM,但我迷路了,因为每个节点
<item>
根的
<channel>
有3个孩子:
<article>
<media>
<link>
也许这可以帮助: http ://www.mkyong.com/java/how-to-modify-xml-file-in-java-dom-parser/
另一个例子:
final String xmlFile = "YourFile.xml";
ArrayList<String> userData = new ArrayList<String>();
FileInputStream fis;
InputStreamReader isr;
String data = null;
try {
fis = c.openFileInput(xmlFile);
isr = new InputStreamReader(fis);
char[] inputBuffer = new char[fis.available()];
isr.read(inputBuffer);
data = new String(inputBuffer);
isr.close();
fis.close();
} catch (FileNotFoundException e3) {
// TODO Auto-generated catch block
e3.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
XmlPullParserFactory factory = null;
try {
factory = XmlPullParserFactory.newInstance();
} catch (XmlPullParserException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
factory.setNamespaceAware(true);
XmlPullParser xpp = null;
try {
xpp = factory.newPullParser();
} catch (XmlPullParserException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
try {
xpp.setInput(new StringReader(data));
} catch (XmlPullParserException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
int eventType = 0;
try {
eventType = xpp.getEventType();
} catch (XmlPullParserException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
while (eventType != XmlPullParser.END_DOCUMENT) {
if (eventType == XmlPullParser.START_DOCUMENT) {
System.out.println("Start document");
} else if (eventType == XmlPullParser.START_TAG) {
System.out.println("Start tag " + xpp.getName());
} else if (eventType == XmlPullParser.END_TAG) {
System.out.println("End tag " + xpp.getName());
} else if (eventType == XmlPullParser.TEXT) {
userData.add(xpp.getText());
System.out.println(xpp.getText());
}
try {
eventType = xpp.next();
} catch (XmlPullParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
项目是你的根。你必须使用 sax 解析器来粗略这个 xml。看这里
我使用 SAX 取得了很大的成功,它既快速又简单。我从一些演示基于模板的解析器生成器的网站中得到了这个简单且“递归”方法的想法。我只是手动完成,但从“DefaultHandler”类扩展。
这里真正的诀窍是,每个对象类型都有自己的“DefaultHandler”扩展,它知道如何单独解析那个类型和那个类型。这使得重构和重用解析器成为可能。然后,每当您看到某种类型的新子“元素”的开始时,您都会创建一个空的子容器,它是“DefaultHandler”的扩展。将当前处理程序设置为该类型的处理程序,并让它递归地消失。
我也可以对此进行改进,例如将一些常用方法推到我自己的扩展“DefaultHandler”的基类中。我还创建了“IXMLSerializer”接口,以便我可以快速写出这些。也可能有更好的方法。
基于此,我想出了一个策略,首先为每种元素类型创建对象......
public class CollectionType implements IXMLSerializer{
public static final String TYPE_TAG = "Collection";
protected IdentityType identity;
protected List<PropertyType> property;
protected List<ItemReferenceType> itemReference;
protected List<SporeType> spore;
protected List<RegionType> region;
protected List<BackpackItemType> backpackItem;
/**
* Gets the value of the identity property.
*
* @return
* possible object is
* {@link IdentityType }
*
*/
public IdentityType getIdentity() {
return identity;
}
/**
* Sets the value of the identity property.
*
* @param value
* allowed object is
* {@link IdentityType }
*
*/
public void setIdentity(IdentityType value) {
this.identity = value;
}
/**
* Gets the value of the property property.
*
* <p>
* This accessor method returns a reference to the live list,
* not a snapshot. Therefore any modification you make to the
* returned list will be present inside the JAXB object.
* This is why there is not a <CODE>set</CODE> method for the property property.
*
* <p>
* For example, to add a new item, do as follows:
* <pre>
* getProperty().add(newItem);
* </pre>
*
*
* <p>
* Objects of the following type(s) are allowed in the list
* {@link PropertyType }
*
*
*/
public List<PropertyType> getProperty() {
if (property == null) {
property = new ArrayList<PropertyType>();
}
return this.property;
}
/**
* Gets the value of the itemReference property.
*
* <p>
* This accessor method returns a reference to the live list,
* not a snapshot. Therefore any modification you make to the
* returned list will be present inside the JAXB object.
* This is why there is not a <CODE>set</CODE> method for the itemReference property.
*
* <p>
* For example, to add a new item, do as follows:
* <pre>
* getItemReference().add(newItem);
* </pre>
*
*
* <p>
* Objects of the following type(s) are allowed in the list
* {@link ItemReferenceType }
*
*
*/
public List<ItemReferenceType> getItemReference() {
if (itemReference == null) {
itemReference = new ArrayList<ItemReferenceType>();
}
return this.itemReference;
}
/**
* Gets the value of the spore property.
*
* <p>
* This accessor method returns a reference to the live list,
* not a snapshot. Therefore any modification you make to the
* returned list will be present inside the JAXB object.
* This is why there is not a <CODE>set</CODE> method for the spore property.
*
* <p>
* For example, to add a new item, do as follows:
* <pre>
* getSpore().add(newItem);
* </pre>
*
*
* <p>
* Objects of the following type(s) are allowed in the list
* {@link SporeType }
*
*
*/
public List<SporeType> getSpore() {
if (spore == null) {
spore = new ArrayList<SporeType>();
}
return this.spore;
}
/**
* Gets the value of the region property.
*
* <p>
* This accessor method returns a reference to the live list,
* not a snapshot. Therefore any modification you make to the
* returned list will be present inside the JAXB object.
* This is why there is not a <CODE>set</CODE> method for the region property.
*
* <p>
* For example, to add a new item, do as follows:
* <pre>
* getRegion().add(newItem);
* </pre>
*
*
* <p>
* Objects of the following type(s) are allowed in the list
* {@link RegionType }
*
*
*/
public List<RegionType> getRegion() {
if (region == null) {
region = new ArrayList<RegionType>();
}
return this.region;
}
/**
* Gets the value of the backpackItem property.
*
* <p>
* This accessor method returns a reference to the live list,
* not a snapshot. Therefore any modification you make to the
* returned list will be present inside the JAXB object.
* This is why there is not a <CODE>set</CODE> method for the backpackItem property.
*
* <p>
* For example, to add a new item, do as follows:
* <pre>
* getBackpackItem().add(newItem);
* </pre>
*
*
* <p>
* Objects of the following type(s) are allowed in the list
* {@link BackpackItemType }
*
*
*/
public List<BackpackItemType> getBackpackItem() {
if (backpackItem == null) {
backpackItem = new ArrayList<BackpackItemType>();
}
return this.backpackItem;
}
@Override
public void serializeType(XmlSerializer serializer, String namespace)
throws IllegalArgumentException, IllegalStateException, IOException {
serializer.startTag(namespace, TYPE_TAG);
getIdentity().serializeType(serializer, namespace);
Iterator<PropertyType> pItor = getProperty().iterator();
while (pItor.hasNext()){
pItor.next().serializeType(serializer, namespace);
}
Iterator<ItemReferenceType> iItor = getItemReference().iterator();
while (iItor.hasNext()){
iItor.next().serializeType(serializer, namespace);
}
Iterator<SporeType> sItor = getSpore().iterator();
while (sItor.hasNext()){
sItor.next().serializeType(serializer, namespace);
}
Iterator<RegionType> rItor = getRegion().iterator();
while (rItor.hasNext()){
rItor.next().serializeType(serializer, namespace);
}
Iterator<BackpackItemType> bItor = getBackpackItem().iterator();
while (bItor.hasNext()){
bItor.next().serializeType(serializer, namespace);
}
serializer.endTag(namespace, TYPE_TAG);
}
}
然后,我创建了一个顶级文档处理程序......
public class DocumentHandler extends DefaultHandler {
public static final String TAG = DocumentHandler.class.getSimpleName();
private static final boolean mDebugLogging = true;
SAXParserFactory mSaxParserFactory;
SAXParser mSaxParser;
XMLReader mXmlReader;
Stack<String> mElementStack = new Stack<String>();
private CollectionType mCollection = null;
private RequestType mRequest = null;
private ResponseType mResponse = null;
public DocumentHandler() throws SAXException, ParserConfigurationException{
mSaxParserFactory = SAXParserFactory.newInstance();
mSaxParserFactory.setNamespaceAware(true); // Must be set true, otherwise no attributes!
//m_saxParserFactory.setFeature("http://xml.org/sax/features/namespaces", true);
//m_saxParserFactory.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
mSaxParser = mSaxParserFactory.newSAXParser();
if (mDebugLogging) {
Log.d(TAG, "SAX parser namespace aware? " + mSaxParser.isNamespaceAware());
Log.d(TAG, "SAX parser is validating? " + mSaxParser.isValidating());
}
mXmlReader = mSaxParser.getXMLReader();
}
@Override
public void endDocument() throws SAXException {
if (mDebugLogging) {
Log.d(TAG, "Beginning Document Parse");
}
super.endDocument();
}
@Override
public void startDocument() throws SAXException {
if (mDebugLogging) {
Log.d(TAG, "Ending Document Parse");
}
super.startDocument();
}
@Override
public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, Attributes attributes) throws SAXException {
if (qName.equals("Collection")) {
mCollection = new CollectionType();
final DefaultHandler handler = new CollectionHandler(mElementStack, mCollection, attributes, mXmlReader, this);
mElementStack.push ("ServerResponse");
mXmlReader.setContentHandler (handler);
}
else if (qName.equals("Request")){
mRequest = new RequestType();
final DefaultHandler handler = new RequestHandler(mElementStack, mRequest, attributes, mXmlReader, this);
mElementStack.push("Request");
mXmlReader.setContentHandler(handler);
}
else if (qName.equals("Response")){
mResponse = new ResponseType();
final DefaultHandler handler = new ResponseHandler(mElementStack, mResponse, attributes, mXmlReader, this);
mElementStack.push("Response");
mXmlReader.setContentHandler(handler);
}
}
@Override
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws SAXException {}
public void parse(InputSource is) throws SAXException, IOException {
mXmlReader.setContentHandler(this);
mXmlReader.parse(is);
}
public RequestType parseRequest(InputStream inStream) throws SAXException, IOException {
final InputSource inSource = new InputSource(inStream);
parse(inSource);
return mRequest;
}
public CollectionType parseCollection(InputStream inStream) throws SAXException, IOException {
final InputSource inSource = new InputSource(inStream);
parse(inSource);
return mCollection;
}
public ResponseType parseResponse(InputStream inStream) throws SAXException, IOException {
final InputSource inSource = new InputSource(inStream);
parse(inSource);
return mResponse;
}
}
然后我为每种类型扩展“DefaultHandler”,如下所示:
公共类 CollectionHandler 扩展 DefaultHandler { private final CharArrayWriter m_textBuffer = new CharArrayWriter (); 私有最终堆栈 m_elementStack; 私人最终 DefaultHandler m_parent; 私有最终 XMLReader m_parser;
private final CollectionType m_collection;
public CollectionHandler(Stack<String> path, CollectionType collection, Attributes attributes, XMLReader parser, DefaultHandler parent) throws SAXException{
m_elementStack = path;
m_collection = collection;
m_parser = parser;
m_parent = parent;
start(attributes);
}
private void start(Attributes attributes){}
@Override
public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, Attributes attributes) throws SAXException {
m_textBuffer.reset();
if (qName.equals("Identity")){
final IdentityType identity = new IdentityType();
m_collection.setIdentity(identity);
final DefaultHandler handler = new IdentityHandler(m_elementStack, identity, attributes, m_parser, this);
m_elementStack.push("Identity");
m_parser.setContentHandler(handler);
}
if (qName.equals("BackpackItem")){
final BackpackItemType backpackItem = new BackpackItemType();
m_collection.getBackpackItem().add(backpackItem);
final DefaultHandler handler = new BackpackItemHandler(m_elementStack, backpackItem, attributes, m_parser, this);
m_elementStack.push("BackpackItem");
m_parser.setContentHandler(handler);
}
else if (qName.equals("ItemReference")){
final ItemReferenceType itemReference = new ItemReferenceType();
m_collection.getItemReference().add(itemReference);
final DefaultHandler handler = new ItemReferenceHandler(m_elementStack, itemReference, attributes, m_parser, this);
m_elementStack.push("ItemReference");
m_parser.setContentHandler(handler);
}
else if (qName.equals("Property")){
final PropertyType property = new PropertyType();
m_collection.getProperty().add(property);
final DefaultHandler handler = new PropertyHandler(m_elementStack, property, attributes, m_parser, this);
m_elementStack.push("Property");
m_parser.setContentHandler(handler);
}
else if (qName.equals("Region")){
final RegionType toInsert = new RegionType();
m_collection.getRegion().add(toInsert);
final DefaultHandler handler = new RegionHandler(m_elementStack, toInsert, attributes, m_parser, this);
m_elementStack.push("Region");
m_parser.setContentHandler(handler);
}
else if (qName.equals("Spore")){
final SporeType toInsert = new SporeType();
m_collection.getSpore().add(toInsert);
final DefaultHandler handler = new SporeHandler(m_elementStack, toInsert, attributes, m_parser, this);
m_elementStack.push ("Spore");
m_parser.setContentHandler (handler);
}
}
@Override
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws SAXException {
if (qName.equals("Collection")){
m_elementStack.pop();
m_parser.setContentHandler(m_parent);
}
}
}