3

我有一个读取 csv 文件并返回操纵(通过 LINQ)结果的函数。我需要打开每个文件两次,因为我需要为不同的用途对数据进行非常不同的切片,并且使用我正在使用的 codeproject 上的“A Fast CSV Reader”更快地读取它两次并每次直接使用 LINQ 进行操作而不是将其读入 DataTable。

单独每个函数调用(imppow 或 impfuel)只需要 2 秒多一点。

六个调用的简单 for 循环(需要 13 秒):

string[] pathstring = { @"C:\Temp\Hourly1.txt", @"C:\Temp\Hourly2.txt", @"C:\Temp\Hourly3.txt" };
string[] pathgran = { "M", "Q", "Y" };
for (int i=0; i < 3; i++)
{
     var respow = imppow(pathstring[i], pathgran[i]);
     Console.WriteLine(respow[0]);

     var resfuel = impfuel(pathstring[i], pathgran[i]);
     Console.WriteLine(resfuel[0]);
}

像这样并行化可以缩短 3 秒,但不会更多:

Parallel.For(0, 3, (i) =>
{
    var respow = imppow(pathstring[i], pathgran[i]);
    Console.WriteLine(respow[0]);

    var resfuel = impfuel(pathstring[i], pathgran[i]);
    Console.WriteLine(resfuel[0]);
});

如前所述,一个电话大约需要 2 秒。我可以通过使用多线程或某事来进一步降低运行时间吗?谢谢。

下面的功能之一:

static object[] impfuel(string filepath, string gran)
{  
    using (CsvReader csv =
           new CsvReader(new StreamReader(filepath), true))
    {
        csv.SupportsMultiline = false;
        var results = csv.Select(r => new { yr = r[1], qr = r[3], mt = r[4], tar = r[7], mac = r[8], fuel = r[9], rg = r[10], rt = r[11], fp = r[22], fi = r[24] })
                         .Where(a => a.rt == "F")
                         .GroupBy(a => new { a.rg, a.fuel, a.tar, a.mt })
                         .Select(g => new { Rpg = g.Select(a => a.rg).First(), Fue = g.Select(a => a.fuel).First(), Tari = g.Select(a => a.tar).First(), Mon = g.Select(a => a.mt).First(), AverageA = g.Average(a => double.Parse(a.fp)), SumA = g.Sum(a => double.Parse(a.fi)) })
                         .ToArray();
        return results;
    }
}

static object[] imppow(string filepath, string gran)
{  
using (CsvReader csv =
       new CsvReader(new StreamReader(filepath), true))
{
    csv.SupportsMultiline = false;
    var results = csv.Select(r => new { yr = r[1], qr = r[3], mt = r[4], tar = r[7], mac = r[8], rg = r[10], rt = r[11], pp = r[17], pi = r[19] })
                     .Where(a => a.rt == "M")
                     .GroupBy(a => new { a.rg, a.tar, a.mt })
                     .Select(g => new { Rpg = g.Select(a => a.rg).First(), Tari = g.Select(a => a.tar).First(), Mon = g.Select(a => a.mt).First(), AverageA = g.Average(a => double.Parse(a.pp)), SumA = g.Sum(a => double.Parse(a.pi)) })
                     .ToArray();
    return results;
}

}

4

1 回答 1

2

您永远不会告诉文件的大小,是几千字节还是我们所说的兆字节?读取文件一次会限制慢速 IO。

我会阅读一次文件,在阅读时我会将内容放入两个不同的列表中。

string[] pathstring = { @"C:\Temp\Hourly1.txt", @"C:\Temp\Hourly2.txt", @"C:\Temp\Hourly3.txt" };
for (int i=0; i < 3; i++)
{
     List<Content> powList = new List<Content>();
     List<Content> fuelList = new List<Content>();
     ReadFile(pathstring[i], ref powList, ref fuelList);
     var respow = imppow(powList);
     Console.WriteLine(respow[0]);

     var resfuel = impfuel(fuelList);
     Console.WriteLine(resfuel[0]);
}

void ReadFile(string filepath, ref List<Content> powList, ref List<Content> fuelList)
{
    using (CsvReader csv = new CsvReader(new StreamReader(filepath), true))
    {
        csv.SupportsMultiline = false;
        foreach(Content content in csv.Select(r => new Content(){ yr = r[1], qr = r[3], mt = r[4], tar = r[7], mac = r[8], fuel = r[9], rg = r[10], rt = r[11], pp = r[17], pi = r[19], fp = r[22], fi = r[24] }))
        {
           if (content.rt == "F")
               fuelList.Add(content);
           else if (content.rt = "M")
               powList.Add(content);
        }
    }
}

static object[] impfuel(List<Content> fuelList)
{  
    var results = fuelList.GroupBy(a => new { a.rg, a.fuel, a.tar, a.mt })
                     .Select(g => new { Rpg = g.Select(a => a.rg).First(), Fue = g.Select(a => a.fuel).First(), Tari = g.Select(a => a.tar).First(), Mon = g.Select(a => a.mt).First(), AverageA = g.Average(a => double.Parse(a.fp)), SumA = g.Sum(a => double.Parse(a.fi)) })
                     .ToArray();
    return results;
}

}

您可以自己制作 imppow 和 Content 类。

于 2013-07-18T20:10:22.557 回答