0

我有一个问题,我试图使用 parallel.for() 将一些文件加载​​到数据库中。我的问题是传递给数据库函数的文件 ID 不知何故不正确。也就是说,数据库返回了错误的数据。我试图通过使用并发字典添加 id/name 对来验证这一点,无论是并行还是不并行。在我看来,循环结束后列表应该相同。但他们不是。这以非常简化的方式模拟了我正在做的事情。

这有意义吗?:

class Program
    {
       ConcurrentDictionary<int, string> _cd = new ConcurrentDictionary<int, string>();
        static void Main()
        {
            //simulate the situation
            int[] idList = new int[] {1, 8, 12, 19, 25, 99};
            string[] fileList = new string[] {"file1", "file8", "file12", "file19", "file25", "file99"};

            //run in serial first
            ProcessFiles(idList, fileList); 

            //write out pairs to text file
            foreach (var item in _cd)
            {
                var key = _cd.key;
                var val = _cd.value;
                string line = string.Format("fileId is {0} and fileName is {1}", key, val);

                File.AppendAllText(@"c:\serial.txt", line + Environment.NewLine);
            }
            //results of text file (all good): 
            //fileId is 1 and fileName is file1
            //fileId is 8 and fileName is file8
            //fileId is 12 and fileName is file12
            //fileId is 19 and fileName is file19
            //fileId is 25 and fileName is file25
            //fileId is 99 and fileName is file99

            _cd.Clear();

            //now run in parallel
            ProcessFilesInParallel(idList, fileList); 

            //write out pairs to text file  
            foreach (var item in _cd)
            {
                var key = _cd.key;
                var val = _cd.value;
                string line = string.Format("fileId is {0} and fileName is {1}", key, val);

                File.AppendAllText(@"c:\parallel.txt", line + Environment.NewLine);
            }

            //results of text file (1. some, not all, are mismatched and 2. not all elements got added): 
            //fileId is 8 and fileName is file8
            //fileId is 12 and fileName is file19
            //fileId is 19 and fileName is file12
            //fileId is 25 and fileName is file25
        }

        private void static ProcessFiles(int[]Ids, string[] files)
        {
            int fileId = 0;
            string fileName = string.Empty;

            for(var i=0, i<Ids.Count; i++) 
            {
                fileId = Ids[i];
                fileName = GetControlFileMetaDataFromDB(fileId);

                _cd.TryAdd(fileId, fileName);
            }
        }

        private void static ProcessFilesInParallel(int[]Ids, string[] files)
        {
            int fileId = 0;
            string fileName = string.Empty;

            Parallel.For(0, Ids.Count, i => 
            {
                fileId = Ids[i];

                //this is returning the wrong fileName 
                fileName = GetControlFileMetaDataFromDB(fileId);

                _cd.TryAdd(fileId, fileName);
            }

            );
        }

        private void static GetControlFileMetaDataFromDB(int fileId)
        {
            //removed for brevity:
            //1. connect to oracle
            //2. call function, passing file id
            //3. iterate over data reader and look for the filename 

            while (reader.Read())
            {
                //strip out filename, add it to collection
                int endPos = reader[0].ToString().IndexOf("txt");
                if (endPos != -1)
                {
                    endPos += 3;
                    int startPos = reader[0].ToString().IndexOf(":\\") - 1; 
                    string path = reader[0].ToString().Substring(startPos, endPos - startPos);
                    sring fileName = Path.GetFileName(path);

                    _cd.TryAdd(fileId, fileName);
                    break;
                }
            }
        }
    }
4

2 回答 2

7

您已在 Parallel.For之外fileId声明,这意味着每次迭代都共享同一个变量。fileName

由于迭代可能很好地在不同线程上并行运行,因此您正在重新分配变量,而另一个同时迭代可能正在使用它们。

你需要做的是将你的变量声明移到循环,这样它们每次迭代都是本地的;

Parallel.For(0, Ids.Count, i => 
{
    int fileId = Ids[i];

    //this is returning the wrong fileName 
    string fileName = GetControlFileMetaDataFromDB(fileId);

    _cd.TryAdd(fileId, fileName);
}
于 2016-02-12T15:19:31.593 回答
1

这里的问题在于ProcessFilesInParallel(int[]Ids, string[] files)功能。循环中的迭代for将并行执行,并且您声明fileId并且fileNamefor' 范围之外,因此该变量在竞争条件下的所有迭代中共享。

你可以解决这个问题,在里面移动fileIdfileName变量for

private static void ProcessFilesInParallel(int[] Ids, string[] files)
{
    Parallel.For(0, Ids.Length, i =>
    {
        var fileId = Ids[i];

        //this is returning the wrong fileName 
        var fileName = GetControlFileMetaDataFromDB(fileId);

        _cd.TryAdd(fileId, fileName);
    });
}

此外,在问题parallel.for 混淆(收藏丢失顺序)的标题中,您说收藏丢失顺序。正如您在此处所读到的,没有为并行循环定义执行顺序。

于 2016-02-12T15:48:13.287 回答