我必须对数百万行数据运行一次 C# 计算并将结果保存在另一个表中。几年来我没有在 C# 中使用过线程。我正在使用 .NET v4.5 和 EF v5。
原始代码大致如下:
public static void Main()
{
Stopwatch sw = new Stopwatch();
sw.Start();
Entities db = new Entities();
DoCalc(db.Clients.ToList());
sw.Stop();
Console.WriteLine(sw.Elapsed);
}
private static void DoCalc(List<Client> clients)
{
Entities db = new Entities();
foreach(var c in clients)
{
var transactions = db.GetTransactions(c);
var result = calulate(transactions); //the actual calc
db.Results.Add(result);
db.SaveChanges();
}
}
这是我对多线程的尝试:
private static int numberOfThreads = 15;
public static void Main()
{
Stopwatch sw = new Stopwatch();
sw.Start();
Entities db = new Entities();
var splitUpClients = SplitUpClients(db.Clients());
Task[] allTasks = new Task[numberOfThreads];
for (int i = 0; i < numberOfThreads; i++)
{
Task task = Task.Factory.StartNew(() => DoCalc(splitupClients[i]));
allTasks[i] = task;
}
Task.WaitAll(allTasks);
sw.Stop();
Console.WriteLine(sw.Elapsed);
}
private static void DoCalc(List<Client> clients)
{
Entities db = new Entities();
foreach(var c in clients)
{
var transactions = db.GetTransactions(c);
var result = calulate(transactions);
db.Results.Add(result);
db.SaveChanges();
}
}
//splits the list of clients into n subgroups
private static List<List<Client>> SplitUpClients(List<Client> clients)
{
int maxPerGroup = (int)Math.Ceiling((double)clients.Count() / numberOfThreads);
return ts.Select((s, i) => new { Str = s, Index = i }).
GroupBy(o => o.Index / maxPerGroup, o => o.Str).
Select(coll => coll.ToList()).
ToList();
}
我的问题是:
这是安全和正确的方法吗?是否有任何明显的缺点(尤其是关于 EF)?
另外,如何找到最佳线程数?是不是越多越好?