0

我正在从 RSS 提要中获取“描述”标签。但是从日志中,我可以看到描述没有完整。它是缩进的,但是如果我将 url 粘贴到浏览器中,它将显示完整的描述。我也没有在代码中压制内容。可能是什么原因以及如何获得完整的内容?

Once upon a time, a Guru was giving darshan to a congregation. People 
were coming and bowing down, seeking blessings. Gurudev was silent most of the 
time and when somebody would come and share their troubles, looking for a response, 
he would say only one thing. One person came to him and said, “I failed [...] 

以下是 feed 的网址:

http://srisriravishankar.org/feed/

谢谢

asatanballa 的编辑问题 [如何在 XML 解析器中检索“内容:编码”标签:

RssFeedStructure.java

public class RssFeedStructure {

//private long articleId;
//private long feedId;
private String title;
private String description;
private String imgLink;
private String pubDate;

private String link;
private String published;
private String content;
private URL url;
private String encodedContent;

/*public long getArticleId() {
    return articleId;
}

public void setArticleId(long articleId) {
    this.articleId = articleId;
}

public long getFeedId() {
    return feedId;
}


public void setFeedId(long feedId) {
    this.feedId = feedId;
}*/

/**
 * @return the title
 */
public String getTitle() {
    return title;
}

/**
 * @param title
 *            the title to set
 */
public void setTitle(String title) {
    this.title = title;
}

/**
 * @return the url
 */
public URL getUrl() {
    return url;
}

/**
 * @param url
 *            the url to set
 */
public void setUrl(URL url) {
    this.url = url;
}

/**
 * @param description
 *            the description to set
 */
public void setDescription(String description) {
    this.description = description;

    /*if (description.contains("<img ")) {
        String img = description.substring(description.indexOf("<img "));
        String cleanUp = img.substring(0, img.indexOf(">") + 1);
        img = img.substring(img.indexOf("src=") + 5);
        int indexOf = img.indexOf("'");
        if (indexOf == -1) {
            indexOf = img.indexOf("\"");
        }
        img = img.substring(0, indexOf);

        this.description = this.description.replace(cleanUp, "");
    }*/
}

/**
 * @return the description
 */
public String getDescription() {
    return description;
}

/**
 * @param pubDate
 *            the pubDate to set
 */
public void setPubDate(String pubDate) {
    this.pubDate = pubDate;
}

/**
 * @return the pubDate
 */
public String getPubDate() {
    return pubDate;
}

/**
 * @param encodedContent
 *            the encodedContent to set
 */
public void setEncodedContent(String encodedContent) {
    this.encodedContent = encodedContent;
}

/**
 * @return the encodedContent
 */
public String getEncodedContent() {
    return encodedContent;
}

/**
 * @param imgLink
 *            the imgLink to set
 */
public void setImgLink(String imgLink) {
    this.imgLink = imgLink;
}

/**
 * @return the imgLink
 */
public String getImgLink() {
    return imgLink;
}


/**
 * @param link
 *            the pubDate to set
 */
public void setLink(String link) {
    this.link = link;
}

/**
 * @return the pubDate
 */
public String getLink() {
    return link;
}

/**
 * @param content
 *            the pubDate to set
 */
public void setContent(String content) {
    this.content = content;
}

/**
 * @return the pubDate
 */
public String getContent() {
    return content;
}

/**
 * @param published
 *            the pubDate to set
 */
public void setPublished(String published) {
    this.published = published;
}

/**
 * @return the pubDate
 */
public String getPublished() {
    return published;
}

}

xml处理程序.java

public class XmlHandler extends DefaultHandler {
private RssFeedStructure feedStr = new RssFeedStructure();
private List<RssFeedStructure> rssList = new ArrayList<RssFeedStructure>();

private int articlesAdded = 0;

// Number of articles to download
private static final int ARTICLES_LIMIT = 55;

StringBuffer chars = new StringBuffer();

public void startElement(String uri, String localName, String qName,
        Attributes atts) {
    chars = new StringBuffer();

    if (qName.equalsIgnoreCase("enclosure"))

    {
        if (!atts.getValue("url").toString().equalsIgnoreCase("null")) {
            feedStr.setImgLink(atts.getValue("url").toString());
        } else {
            feedStr.setImgLink("");
        }
    }

}

public void endElement(String uri, String localName, String qName)
        throws SAXException {
    if (localName.equalsIgnoreCase("title")) {
        feedStr.setTitle(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("description")) {
       feedStr.setDescription(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("pubDate")) {
       feedStr.setPubDate(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("encoded")) {
       feedStr.setEncodedContent(chars.toString());
    } 

    else if (localName.equalsIgnoreCase("published")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (localName.equalsIgnoreCase("content")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (localName.equalsIgnoreCase("link")) {
           feedStr.setEncodedContent(chars.toString());
    }

    else if (qName.equalsIgnoreCase("media:content"))
    {
    } 

    else if (localName.equalsIgnoreCase("link")) {
    }


    if (localName.equalsIgnoreCase("item")) {
        rssList.add(feedStr);

        feedStr = new RssFeedStructure();
        articlesAdded++;
        if (articlesAdded >= ARTICLES_LIMIT) {
            throw new SAXException();
        }
    }
}

public void characters(char ch[], int start, int length) {
    chars.append(new String(ch, start, length));
}

public List<RssFeedStructure> getLatestArticles(String feedUrl) {
    URL url = null;
    try {

        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();
        url = new URL(feedUrl);
        xr.setContentHandler(this);
        xr.parse(new InputSource(url.openStream()));
    } catch (IOException e) {
    } catch (SAXException e) {

    } catch (ParserConfigurationException e) {

    }

    return rssList;
}

}
4

2 回答 2

0

尝试添加另一个其他部分,但查找“内容:编码”。我相信根据我看到的标签,仅仅“内容”是行不通的。

else if (localName.equalsIgnoreCase("content:encoded")) {
       feedStr.setEncodedContent(chars.toString());
}

-- 线程的原始答案 --

这通常是在提要的生成者的控制之下,而不是你作为提要的消费者。在为博客配置 RSS 提要时,所有者决定要么包括整个文本,要么只包括​​一些子集作为摘要。一些提要所有者可能会执行子集以让您实际访问该站点,而不仅仅是让您使用提要。

于 2013-07-28T14:49:30.183 回答
0

刚刚知道 SAX Parser 不会从 RSS Feed 中获取诸如“content:encoded”之类的标签。您可以使用 DOM 解析器来代替。

于 2013-07-29T07:44:55.573 回答