1

我正在尝试执行从http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-Part-2.aspx复制的 F# 脚本作为 fs 程序而不是脚本。我已经下载了我正在使用的所有库,并在其他环境中对其进行了测试,它们都可以正常工作。它正确编译并将 csv 文件排序到一个数组中,但在以下情况下不执行:

let labels = fileAsLines |> Array.map (fun line -> line.[4])
dataset, labels

预先感谢您对我经常阅读和使用此论坛的任何帮助,并感谢所有指导。

// Learn more about F# at http://fsharp.net
// Code from http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-part-2.aspx

open MicrosoftResearch.Infer.Fun.FSharp.Syntax
open MicrosoftResearch.Infer.Fun.FSharp.Inference
open MicrosoftResearch.Infer.Fun.Lib
open MicrosoftResearch.Infer.Maths
open System.IO
open System
open System.Drawing
open MSDN.FSharp.Charting

let distance v1 v2 =
    Array.zip v1 v2
    |> Array.fold (fun sum e -> sum + pown (fst e - snd e) 2) 0.0|> sqrt

let classify subject dataset labels k =
    dataset
    |> Array.map (fun row -> distance row subject)
    |> Array.zip labels
    |> Array.sortBy snd
    |> Array.toSeq
    |> Seq.take k
    |> Seq.groupBy fst
    |> Seq.maxBy (fun g -> Seq.length (snd g))
let column (dataset: float [][]) i =
        dataset |> Array.map (fun row -> row.[i])

let columns (dataset: float [][]) =
    let cols = dataset.[0] |> Array.length
    [| for i in 0 .. (cols - 1) -> column dataset i |]

let minMax dataset =
    dataset
    |> columns
    |> Array.map (fun col -> Array.min(col), Array.max(col))

let minMaxNormalizer dataset =
    let bounds = minMax dataset
    fun (vector: float[]) ->
        Array.mapi (fun i v ->
            (vector.[i] - fst v) / (snd v - fst v)) bounds

let normalize data (normalizer: float[] -> float[]) =
    data |> Array.map normalizer

let classifier dataset labels k =
    let normalizer = minMaxNormalizer dataset
    let normalized = normalize dataset normalizer
    fun subject -> classify (normalizer(subject)) normalized labels k

let elections =
    let file = @"C:\Users\Jessica\Dataset\Election2008.txt"
    let fileAsLines =
        File.ReadAllLines(file)
            |> Array.map (fun line -> line.Split(','))
    let dataset =
        fileAsLines
        |> Array.map (fun line ->
            [| Convert.ToDouble(line.[1]);
               Convert.ToDouble(line.[2]);
               Convert.ToDouble(line.[3]) |])
    let labels = fileAsLines |> Array.map (fun line -> line.[4])
    dataset, labels

let evaluate dataset (labels: string []) k prop =
    let size = dataset |> Array.length
    let sample = floor ((float)size * prop) |> (int)
    let testSubjects, testLabels = dataset.[0 .. sample-1], labels.[0..sample-1]
    let trainData = dataset.[sample .. size-1], labels.[sample .. size-1]
    let c = classifier (fst trainData) (snd trainData) k   
    let results =
        testSubjects
        |> Array.mapi (fun i e -> fst (c e), testLabels.[i])
    results
    |> Array.iter (fun e -> printfn "%s %s" (fst e) (snd e))
    let correct =
       results
        |> Array.filter (fun e -> fst e = snd e)
        |> Array.length
    printfn "%i out of %i called correctly" correct sample
4

1 回答 1

2

块内的代码let elections被执行的原因是它被定义为一个值而不是一个函数(它不接受任何参数,也不接受 unit ())。这意味着它在声明时执行。

脚本中在此之后出现的唯一代码声明了一个函数(称为evaluate; 它看起来很相似,但它接受参数,因此除非有人调用它并提供所需的参数,否则不会执行),但是您没有任何代码叫它。

我相信让它做我认为你想做的最简单的改变是:

  1. 去掉函数末尾的kandprop参数evaluate(这些好像没用)
  2. 在脚本的最后,evaluate使用您存储的值调用该方法,elections如下所示:

    let dataset, labels = elections

    evaluate dataset labels

稍微重构一下代码可能是有意义的,因为在 的声明期间执行代码似乎有点令人困惑elections,但是一旦代码工作,重构和理解正在发生的事情可能会更容易。

于 2013-11-02T12:01:40.073 回答