谁能救我?我有以下代码:
private List<string> GenerateTerms(string[] docs)
{
List <string> uniques = new List<string>();
for (int i = 0; i < docs.Length; i++)
{
string[] tokens = docs[i].Split(' ');
List<string> toktolist = new List<string>(tokens.ToList());
var query = toktolist.GroupBy(word => word)
.OrderByDescending(g => g.Count())
.Select(g => g.Key)
.Take(20000);
foreach (string k in query)
{
if (!uniques.Contains(k))
uniques.Add(k);
}
}
return uniques;
}
它是关于根据最高频率从多个文档中生成术语。我用字典做了同样的过程。在这两种情况下都花费了 440 毫秒。但令人惊讶的是,当我使用带有数组列表的过程时,如下代码所示
private ArrayList GenerateTerms(string[] docs)
{
Dictionary<string, int> yy = new Dictionary<string, int>();
ArrayList uniques = new ArrayList();
for (int i = 0; i < docs.Length; i++)
{
string[] tokens = docs[i].Split(' ');
yy.Clear();
for (int j = 0; j < tokens.Length; j++)
{
if (!yy.ContainsKey(tokens[j].ToString()))
yy.Add(tokens[j].ToString(), 1);
else
yy[tokens[j].ToString()]++;
}
var sortedDict = (from entry in yy
orderby entry.Value descending
select entry).Take(20000).ToDictionary
(pair => pair.Key, pair => pair.Value);
foreach (string k in sortedDict.Keys)
{
if (!uniques.Contains(k))
uniques.Add(k);
}
}
return uniques;
}
它花了 350 毫秒。数组列表不应该比列表和字典慢吗?请用这种时态拯救我。