0

给定一个 URL,如果它有任何 RSS 节点,那么我将添加到数据库中。

例如:

对于此 URLrssDoc.SelectNodes("rss/channel/item").Count大于零。

但是对于原子 urlrssDoc.SelectNodes("rss/channel/item").count等于零。

如何检查 Atom/RSS URL 是否有任何节点?我试过了rssDoc.SelectNodes("feed/entry").Count,但给了我零计数。

Public Shared Function HasRssItems(ByVal url as string) As Boolean
Dim myRequest As WebRequest
Dim myResponse As WebResponse
Try
    myRequest = System.Net.WebRequest.Create(url)
    myRequest.Timeout = 5000
    myResponse = myRequest.GetResponse()

    Dim rssStream As Stream = myResponse.GetResponseStream()
    Dim rssDoc As New XmlDocument()
    rssDoc.Load(rssStream)

    Return rssDoc.SelectNodes("rss/channel/item").Count > 0
Catch ex As Exception
    Return False
Finally
    myResponse.Close()
End Try

结束功能

4

1 回答 1

1

您的主要问题是这一行的 XML“节点路径”:

Return rssDoc.SelectNodes("rss/channel/item").Count > 0

仅对RSS 提要有效,对ATOM 提要无效。

我过去解决这个问题的一种方法是使用一个简单的函数将 ATOM 提要转换为 RSS 提要。当然,您可以采用另一种方式,或者根本不转换,但是,转换为单一格式使您能够编写一个“通用”代码块,该代码将提取您可能感兴趣的提要项目的各种元素在(即日期、标题等)

Code Project 上有一篇ATOM to RSS Converter 文章提供了这种转换,但是,它是用 C# 编写的。我之前自己手动将其转换为 VB.NET,所以这里是 VB.NET 版本:

Private Function AtomToRssConverter(ByVal atomDoc As XmlDocument) As XmlDocument
    Dim xmlDoc As XmlDocument = atomDoc
    Dim xmlNode As XmlNode = Nothing
    Dim mgr As New XmlNamespaceManager(xmlDoc.NameTable)
    mgr.AddNamespace("atom", "http://purl.org/atom/ns#")
    Const rssVersion As String = "2.0"
    Const rssLanguage As String = "en-US"
    Dim rssGenerator As String = "RDFFeedConverter"
    Dim memoryStream As New MemoryStream()
    Dim xmlWriter As New XmlTextWriter(memoryStream, Nothing)
    xmlWriter.Formatting = Formatting.Indented
    Dim feedTitle As String = ""
    Dim feedLink As String = ""
    Dim rssDescription As String = ""

    xmlNode = xmlDoc.SelectSingleNode("//atom:title", mgr)
    If xmlNode Is Nothing Then
          This looks like an ATOM v1.0 format, rather than ATOM v0.3.
        mgr.RemoveNamespace("atom", "http://purl.org/atom/ns#")
        mgr.AddNamespace("atom", "http://www.w3.org/2005/Atom")
    End If

    xmlNode = xmlDoc.SelectSingleNode("//atom:title", mgr)
    If Not xmlNode Is Nothing Then
        feedTitle = xmlNode.InnerText
    End If
    xmlNode = xmlDoc.SelectNodes("//atom:link/@href", mgr)(2)
    If Not xmlNode Is Nothing Then
        feedLink = xmlNode.InnerText
    End If
    xmlNode = xmlDoc.SelectSingleNode("//atom:tagline", mgr)
    If Not xmlNode Is Nothing Then
        rssDescription = xmlNode.InnerText
    End If
    xmlNode = xmlDoc.SelectSingleNode("//atom:subtitle", mgr)
    If Not xmlNode Is Nothing Then
        rssDescription = xmlNode.InnerText
    End If

    xmlWriter.WriteStartElement("rss")
    xmlWriter.WriteAttributeString("version", rssVersion)
    xmlWriter.WriteStartElement("channel")
    xmlWriter.WriteElementString("title", feedTitle)
    xmlWriter.WriteElementString("link", feedLink)
    xmlWriter.WriteElementString("description", rssDescription)
    xmlWriter.WriteElementString("language", rssLanguage)
    xmlWriter.WriteElementString("generator", rssGenerator)
    Dim items As XmlNodeList = xmlDoc.SelectNodes("//atom:entry", mgr)
    If items Is Nothing Then
        Throw New FormatException("Atom feed is not in expected format. ")
    Else
        Dim title As String = [String].Empty
        Dim link As String = [String].Empty
        Dim description As String = [String].Empty
        Dim author As String = [String].Empty
        Dim pubDate As String = [String].Empty
        For i As Integer = 0 To items.Count - 1
            Dim nodTitle As XmlNode = items(i)
            xmlNode = nodTitle.SelectSingleNode("atom:title", mgr)
            If Not xmlNode Is Nothing Then
                title = xmlNode.InnerText
            End If
            Try
                link = items(i).SelectSingleNode("atom:link[@rel= alternate ]", mgr).Attributes("href").InnerText
            Catch ex As Exception
                link = items(i).SelectSingleNode("atom:link", mgr).Attributes("href").InnerText
            End Try
            xmlNode = items(i).SelectSingleNode("atom:content", mgr)
            If Not xmlNode Is Nothing Then
                description = xmlNode.InnerText
            End If
            xmlNode = items(i).SelectSingleNode("//atom:name", mgr)
            If Not xmlNode Is Nothing Then
                author = xmlNode.InnerText
            End If
            xmlNode = items(i).SelectSingleNode("atom:issued", mgr)
            If Not xmlNode Is Nothing Then
                pubDate = xmlNode.InnerText
            End If
            xmlNode = items(i).SelectSingleNode("atom:updated", mgr)
            If Not xmlNode Is Nothing Then
                pubDate = xmlNode.InnerText
            End If
            xmlWriter.WriteStartElement("item")
            xmlWriter.WriteElementString("title", title)
            xmlWriter.WriteElementString("link", link)
            If pubDate.Length < 1 Then
                pubDate = Date.MinValue.ToString()
            End If
            xmlWriter.WriteElementString("pubDate", Convert.ToDateTime(pubDate).ToUniversalTime().ToString("ddd, dd MMM yyyy HH:mm:ss G\MT"))
            xmlWriter.WriteElementString("author", author)
            xmlWriter.WriteElementString("description", description)
            xmlWriter.WriteEndElement()
        Next
        xmlWriter.WriteEndElement()
        xmlWriter.Flush()
        xmlWriter.Close()
    End If
    Dim retDoc As New XmlDocument()
    Dim outStr As String = Encoding.UTF8.GetString(memoryStream.ToArray())
    retDoc.LoadXml(outStr)
    Return retDoc
End Function

用法相当简单。只需将您的 ATOM 提要加载到一个XmlDocument对象中并将其传递给此函数,您就会得到一个XmlDocumentRSS 格式的对象!

如果你有兴趣,我已经在 pastebin.com 上放了一个完整的 RSSReader 类

于 2010-01-07T15:41:05.827 回答