0

我解析了一些 RSS 提要(尝试了不同的提要......)并且每次都会随机删除一些字符。我究竟做错了什么?为什么它在某些情况下有效而在其他情况下却无效?还有另一种方法吗?XML 将(在大多数情况下)包括 UTF-8 字符(如 ä、ö、ü 等),因此该解决方案也应该适用于这些字符。

如果您需要更多信息(更多代码、更详细信息等),请告诉我!

这是我的代码:

public class RSSHandler extends DefaultHandler {

final int state_unknown = 0;
final int state_title = 1;
final int state_description = 2;
final int state_link = 3;
final int state_pubdate = 4;
int currentState = state_unknown;
StringBuilder strCharacters;

RSSFeed feed;
RSSItem item;

boolean inEntity = false;
String entityName = "";

boolean itemFound = false;

public RSSHandler() {
    strCharacters = new StringBuilder();
}

public RSSFeed getFeed() {
    return feed;
}

@Override
public void startDocument() throws SAXException {
    feed = new RSSFeed();
    item = new RSSItem();
}

@Override
public void endDocument() throws SAXException {
}

@Override
public void startElement(String uri, String localName, String qName,
        Attributes attributes) throws SAXException {
    strCharacters = new StringBuilder();
    if (localName.equalsIgnoreCase("item")) {
        itemFound = true;
        item = new RSSItem();
        currentState = state_unknown;
    } else if (localName.equalsIgnoreCase("title")) {
        currentState = state_title;
    } else if (localName.equalsIgnoreCase("description")) {
        currentState = state_description;
    } else if (localName.equalsIgnoreCase("link")) {
        currentState = state_link;
    } else if (localName.equalsIgnoreCase("pubdate")) {
        currentState = state_pubdate;
    } else {
        currentState = state_unknown;
    }

}

@Override
public void endElement(String uri, String localName, String qName)
        throws SAXException {
    if (itemFound == true) {
        switch (currentState) {
            case state_title:
                item.setTitle(strCharacters.toString());
                break;
            case state_description:
                break;
            case state_link:
                item.setLink(strCharacters.toString());
                break;
            case state_pubdate:
                String dateStr = strCharacters.toString();
                SimpleDateFormat curFormater = new SimpleDateFormat(
                        "EEE, dd MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
                Date dateObj = null;
                try {
                    dateObj = curFormater.parse(dateStr);
                    SimpleDateFormat postFormater = new SimpleDateFormat(
                            "dd.MM.yyyy HH:mm");
                    String newDateStr = postFormater.format(dateObj);
                    item.setPubdate(newDateStr);
                } catch (ParseException e) {
                    e.printStackTrace();
                }
                break;
            default:
                break;
        }
    } else {
        switch (currentState) {
            case state_title:
                feed.setTitle(strCharacters.toString());
                break;
            case state_description:
                break;
            case state_link:
                feed.setLink(strCharacters.toString());
                break;
            case state_pubdate:
                feed.setPubdate(strCharacters.toString());
                break;
            default:
                break;
        }
    }

    currentState = state_unknown;

    if (localName.equalsIgnoreCase("item")) {
        feed.addItem(item);
    }
}

public void startEntity(String name) throws SAXException {
    inEntity = true;
    entityName = name;
}

@Override
public void characters(char[] ch, int start, int length)
        throws SAXException {

    strCharacters = new StringBuilder();
    if (inEntity) {
        inEntity = false;
        strCharacters.append("&" + entityName + ";");
    } else {
        for (int i = start; i < start + length; i++) {
            strCharacters.append(ch[i]);
        }
    }

    // strCharacters.append(ch, start, length);
}

}

4

1 回答 1

0

您正在为StringBuilder每次characters()通话创建一个新的。这是不正确的。每个元素有很多次调用characters()——您需要连接所有这些结果,而不仅仅是收集最后一个。

于 2012-06-04T10:53:34.550 回答