3

我正在尝试解析如下所示的 XML 文件:

<?xml version="1.0" encoding="utf-8"?>
<downloaddata>
    <downloaditem itemid="1">
    <title>Abdul kalaam Inspirational Talk</title>
    <downloadlink>http://o-o.preferred.spectranet-blr1.v8.lscache4.c.youtube.com/videoplayback?upn=Rxb-DvFeBTE&sparams=cp%2Cid%2Cip%2Cipbits%2Citag%2Cratebypass%2Csource%2Cupn%2Cexpire&fexp=906512%2C907217%2C907335%2C921602%2C919306%2C919316%2C904455%2C919324%2C904452&itag=18&ip=203.0.0.0&signature=96D7FA17DF684B4C2CD30F12251F3263C83EC443.05F62E98E1059BB44459ABF319F50DC4B7E6D90E&sver=3&ratebypass=yes&source=youtube&expire=1337691481&key=yt1&ipbits=8&cp=U0hSTFZUT19NS0NOMl9OTlNFOmlwaTFSSGFfd3NK&id=67ffa1d50864f57d&title=Abdul%20Kalam%20inspirational%20Speech%20on%20Leadership%20and%20Motivation</downloadlink>
    </downloaditem>
</downloaddata>

downloadlink当标签的数据如上时,似乎解析失败。我试图用相同长度的其他东西替换数据,并且它有效。

下面是我正在使用的安卓代码。

import java.io.File;
import java.io.IOException;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;

import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import android.os.Environment;

public class Wilxmlparser extends DefaultHandler{

List<VideoDetails> downloadList;
private String tempVal;
private VideoDetails tempVidDet;

public Wilxmlparser(){

}

public void parseXML() {

//get a factory
SAXParserFactory spf = SAXParserFactory.newInstance();
try {

    //get a new instance of parser
    SAXParser sp = spf.newSAXParser();

    File downloadInfo =new         File(Environment.getExternalStorageDirectory()+"/watchitlater/config/downloadinfo1.xml");        
    //parse the file and also register this class for call backs
    sp.parse(downloadInfo, this);

}catch(SAXException se) {
    se.printStackTrace();
}catch(ParserConfigurationException pce) {
    pce.printStackTrace();
}catch (IOException ie) {
    ie.printStackTrace();
}
}


//Event Handlers
@Override
public void startElement(String uri, String localName, String qName, Attributes     attributes) throws SAXException {
//reset
tempVal = "";
if(qName.equalsIgnoreCase("downloaditem")) {
    tempVidDet = new VideoDetails();
    tempVidDet.setItemId(Integer.parseInt(attributes.getValue("itemid")));
    }
}

@Override
public void characters(char[] ch, int start, int length) throws SAXException {
tempVal = new String(ch,start,length);
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException                 {

if(qName.equalsIgnoreCase("downloaditem")) {
downloadList.add(tempVidDet);
}else if (qName.equalsIgnoreCase("title")) {
    tempVidDet.setTitle(tempVal);
}else if (qName.equalsIgnoreCase("downloadlink")) {
    tempVidDet.setDownloadLink(tempVal);        
    }
}
}

上面的代码没有endElement为上面的 xml 文件提供回调。但是,如果 xml 要像

<?xml version="1.0" encoding="utf-8"?>
<downloaddata>
    <downloaditem itemid="1">
        <title>Abdul kalaam Inspirational Talk</title>
        <downloadlink>http://www.gmail.com/hello/world/sdfsdf%20.@@%!@#    ($dwe</downloadlink>
    </downloaditem>
</downloaddata>

或者

<?xml version="1.0" encoding="utf-8"?>
<downloaddata>
    <downloaditem itemid="1">
        <title>Abdul kalaam Inspirational Talk</title>
            <downloadlink>httphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttphttpa</downloadlink>
    </downloaditem>
</downloaddata>

然后它工作正常。我究竟做错了什么?

4

2 回答 2

1

您的解析器无法解析有问题的 xml 的原因是它是无效的 xml。导致问题的数据部分包含必须转义的字符。有关更多信息,请参阅关于 XML 的维基百科文章中的字符和转义

这在生成 xml 的任何内容中都得到了最好的纠正,最简单的解决方法是将有问题的文本包装在CDATA 部分中。

数据修复后,您可能还会看到由解析代码中的误解引起的问题。

@Override
public void characters(char[] ch, int start, int length) throws SAXException {
   tempVal = new String(ch,start,length);
}

不会总是获取开始和结束标记之间的所有字符,因为此方法的合同允许多次调用它。您需要追加到在方法中初始化并在startElement方法中使用的字符串缓冲区,而不是简单地复制到字符串中endElement

有关此方法解析问题的更多信息,请参阅我对另一个 SO 问题的回答。characters

于 2012-05-22T14:05:18.867 回答
1

Parser 不会解析特殊字符。如果所有特殊字符都存在于

块引用

http://oo.preferred.spectranet-blr1.v8.lscache4.c.youtube.com/videoplayback?upn=Rxb-DvFeBTE&sparams=cp%2Cid%2Cip%2Cipbits%2Citag%2Cratebypass%2Csource%2Cupn%2Cexpire&fexp=906512% 2C907217%2C907335%2C921602%2C919306%2C919316%2C904455%2C919324%2C904452&itag=18&ip=203.0.0.0&signature=96D7FA17DF684B4C2CD30F12251F3263C83EC443.05F62E98E1059BB44459ABF319F50DC4B7E6D90E&sver=3&ratebypass=yes&source=youtube&expire=1337691481&key=yt1&ipbits=8&cp=U0hSTFZUT19NS0NOMl9OTlNFOmlwaTFSSGFfd3NK&id=67ffa1d50864f57d&title=Abdul%20Kalam%20inspirational%20Speech %20on%20领导力%20和%20动机

块引用

您可以将此文本传递给 TextUtils.htmlEncode(string) 然后开始解析。我认为它可以工作或改变它在服务器端为您提供使用 UTF-8 字符集编码的数据,在设备端您可以使用相同的字符集进行解码

于 2012-05-22T11:33:59.113 回答