3

我正在自学 F#(这是为了好玩,不是为了工作/学校),我正在尝试编写一个简单的解析器来计算 Windows Phone 应用程序跨多个市场的评论数量。毫无疑问,到目前为止我的代码很丑陋,但我正在努力改进它并遵循函数式编程范式。因为我来自 C、C++、C# 世界,所以很难。

  • 来自 C 世界,我喜欢 null 值。我知道函数式编程/F# 不鼓励使用 null,但我想不出不使用它的方法。例如,在函数解析中有一个空检查。我怎么不这样做?

  • 现在我的代码只计算第一页上的评论数量,但一个应用程序可能有超过 10 条评论,因此有多个页面。如何递归浏览所有页面(功能 downloadReviews 或解析)。

  • 我们如何将这段代码扩展为完全异步的?

以下是我到目前为止的代码。除了上述问题之外,我真的很想有人可以帮助我并指导我如何改进我的代码的整体结构。

open System
open System.IO
open System.Xml
open System.Xml.Linq
open Printf

type DownloadPageResult = {
    Uri: System.Uri;
    ErrorOccured: bool;
    Source: string;
}

type ReviewData = {
    CurrentPageUri: System.Uri;
    NextPageUri: System.Uri;
    NumberOfReviews: int;
}

module ReviewUrl = 
    let getBaseUri path =
        new Uri(sprintf "http://cdn.marketplaceedgeservice.windowsphone.com/%s" path)

    let getUri country locale appId =
        getBaseUri(sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appId country locale)

let downloadPage (uri: System.Uri) =
    try
        use webClient = new System.Net.WebClient()
        printfn "%s" (uri.ToString())
        webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
        webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch")
        webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6")
        webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36")
        { Uri = uri; Source = webClient.DownloadString(uri); ErrorOccured = false }
    with error -> { Uri = uri; Source = String.Empty; ErrorOccured = true }

let downloadReview country locale appId =
    let uri = ReviewUrl.getUri country locale appId
    downloadPage uri

let parse(pageResult: DownloadPageResult) =
    if pageResult.ErrorOccured then { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = 0 }
    else 
        let reader = new StringReader(pageResult.Source)
        let doc = XDocument.Load(reader)
        let ns = XNamespace.Get("http://www.w3.org/2005/Atom")

        let nextUrl = query { for link in doc.Descendants(ns + "link") do
                              where (link.Attribute(XName.Get("rel")).Value = "next")
                              select link.Value
                              headOrDefault }

        if nextUrl = null then
            { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 
        else
            { CurrentPageUri = pageResult.Uri; NextPageUri = ReviewUrl.getBaseUri(nextUrl); NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 

let downloadReviews(locale: string) =
    let appId = "4e08377c-1240-4f80-9c35-0bacde2c66b6"
    let country = locale.Substring(3)
    let pageResult = downloadReview country locale appId
    let parseResult = parse pageResult
    parseResult


[<EntryPoint>]
let main argv = 
    let locales = [| "en-US"; "en-GB"; |]
    let results = locales |> Array.map downloadReviews

    printfn "%A" results
    0
4

3 回答 3

10

我更多地解决了这个问题,并尝试使用 XML 类型提供程序和 F# Data 的其他功能。这不是完整的代码,但它应该足以给你一个想法(并表明类型提供程序非常好:-)):

首先,我需要一些参考资料:

#r "System.Xml.Linq.dll"
#r "FSharp.Data.dll"
open FSharp.Data
open FSharp.Net

接下来,我编写了以下代码来下载一个示例页面。

let data =
  Http.Request
   ( "http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/4e08377c-1240-4f80-9c35-0bacde2c66b6/reviews",
     query=["os", "8.0.0.0"; "cc", "US"; "lang", "en-US"; "hw", "520170499"; "dm", "Test"; "chunksize", "10" ],
     headers=["User-Agent", "F#"])

我将示例另存为D:\temp\appstore.xml,然后使用 XML 类型提供程序来获得一个很好的类型来解析页面:

type PageDocument = XmlProvider< @"D:\temp\appstore.xml" >

然后您可以像这样下载和解析页面(这显示了如何获取评论数量和有关下一个链接的信息):

let parseAsync (locale:string) appId = async {
  let country = locale.Substring(3)

  // Make the request (asynchronously) using the parameters specified
  let! data =
    Http.AsyncRequest
     ( "http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/" 
         + appId + "/reviews",
       query=[ "os", "8.0.0.0"; "cc", country; "lang", locale; 
               "hw", "520170499"; "dm", "Test"; "chunksize", "10" ],
       headers=["User-Agent", "F#"])

  // Parse the result using the type-provider generated type
  let page = PageDocument.Parse(data)

  // Now you can type 'page' followed by '.' and explore the results!
  // page.GetLinks() returns all links and page.GetEntries() returns
  // review entries. Each link also has 'Rel' and 'Href' properties:
  let nextLink = 
    page.GetLinks() 
    |> Seq.tryFind (fun link -> link.Rel = "next")
    |> Option.map (fun link -> link.Href)
  let reviewsCount = page.GetEntries().Length    
  return (reviewsCount, nextLink) }
于 2013-06-04T02:39:17.763 回答
2

使代码异步的一般模式是找到 I/O 昂贵的操作(在调用树中的某处),然后从那里“向上”,并使所有使用它的代码也异步,直到你到达你需要的点堵塞。

在您的示例中,原始操作是下载,因此您将从downloadPage异步开始:

let downloadPage (uri: System.Uri) = async {
    try
        use webClient = new System.Net.WebClient()
        printfn "%s" (uri.ToString())
        // (Headers omitted)
        let! source = webClient.AsyncDownloadString(uri)
        return { Uri = uri; Source = source; ErrorOccured = false }
    with error -> 
        return { Uri = uri; Source = String.Empty; ErrorOccured = true } }

您需要将代码包装在 中async { ... },调用异步版本的DownloadStringusing并使用(在两个分支中)let!返回结果。return

然后你需要制作类似downloadReviewand的函数downloadReviews(同样,将它们包装在异步块中,调用其他异步操作,如downloadPageusinglet!或 using return!)。

最后,如果您正在编写控制台应用程序,则需要阻止,但您可以并行运行不同语言环境的下载。假设downloadReviews是异步的:

let locales = [| "en-US"; "en-GB"; |]
let results = 
  locales 
  |> Array.map downloadReviews   // Build an array of asynchronous computations
  |> Async.Parallel              // Compose them into a single, parallel computation
  |> Async.RunSynchronously      // Run the computation and wait

要回答其他问题,我认为null在上面的示例中使用可能没问题(您正在调用返回它的 LINQ,因此没有简单的方法可以避免这种情况)。实际上可以改用选项类型,但有点棘手 -如果您有兴趣,请参阅此代码段

此外,您可以使用F# 数据库Http.AsyncRequest中的方法,它为您提供了一种更简单的方法来构造复杂的 HTTP 请求(但我是该库的贡献者之一,所以我有偏见!)

于 2013-06-04T00:54:56.767 回答
2

DownloadString正如 Tomas 所说,创建一个基于异步的版本(或者只使用他的 FSharp.Data 库来处理它)会更“实用” 。

您还可以将FSharp.DataExtCore结合使用,以利用 ExtCore 中的asyncMaybeasyncChoice工作流。这些工作流程在正常工作流程之上提供了非常易于使用的错误处理async

无论如何,我花了几分钟清理你的代码。这并不多,但它确实在几个地方简化了你的代码:

open System
open System.IO
open System.Xml
open System.Xml.Linq
open Printf

type DownloadPageResult = {
    Uri : System.Uri;
    ErrorOccured : bool;
    Source : string;
}

type ReviewData = {
    CurrentPageUri : System.Uri;
    NextPageUri : System.Uri option;
    NumberOfReviews : uint32;
}

module ReviewUrl =
    let baseUri = Uri ("http://cdn.marketplaceedgeservice.windowsphone.com/", UriKind.Absolute)

    let getUri country locale (appId : System.Guid) =
        let localUri =
            let appIdStr = appId.ToString "D"
            sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appIdStr country locale
        Uri (baseUri, localUri)

let downloadPage (uri : System.Uri) =
    try
        use webClient = new System.Net.WebClient()
        printfn "%s" (uri.ToString())
        webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
        webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch")
        webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6")
        webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36")
        { Uri = uri; Source = webClient.DownloadString uri; ErrorOccured = false }
    with error ->
        { Uri = uri; Source = String.Empty; ErrorOccured = true }


let parse (pageResult : DownloadPageResult) =
    if pageResult.ErrorOccured then
        { CurrentPageUri = pageResult.Uri; NextPageUri = None; NumberOfReviews = 0u }
    else 
        use reader = new StringReader (pageResult.Source)
        let doc = XDocument.Load reader
        let ns = XNamespace.Get "http://www.w3.org/2005/Atom"

        let nextUrl =
            query {
            for link in doc.Descendants(ns + "link") do
            where (link.Attribute(XName.Get("rel")).Value = "next")
            select link.Value
            headOrDefault }

        {   CurrentPageUri = pageResult.Uri;
            NextPageUri =
                if System.String.IsNullOrEmpty nextUrl then None
                else Some <| Uri (ReviewUrl.baseUri, nextUrl);
            NumberOfReviews =
                doc.Descendants (ns + "entry") |> Seq.length |> uint32; }

let downloadReviews (locale : string) =
    System.Guid "4e08377c-1240-4f80-9c35-0bacde2c66b6"
    |> ReviewUrl.getUri (locale.Substring 3) locale
    |> downloadPage
    |> parse


[<EntryPoint>]
let main argv =
    let locales = [| "en-US"; "en-GB"; |]
    let results = locales |> Array.map downloadReviews

    printfn "%A" results
    0
于 2013-06-04T02:16:46.647 回答