-3

我的以下代码需要 7 分钟以上才能完成,即使我使用 Parallel.Foreach 也是如此。我迭代的列表“final_products”包含大约 7000 个产品。

 public void GenerateTreeFromAllFinalProducts()
    {
        XmlSerializer serializer = new XmlSerializer(typeof(ImageFeature<float>[]));
        DSTableAdapters.Products_UniqueTableAdapter pft = new DSTableAdapters.Products_UniqueTableAdapter();
        DSTableAdapters.Products_Unique_SURFTableAdapter pus = new DSTableAdapters.Products_Unique_SURFTableAdapter();
        DS.Products_UniqueDataTable final_products = pft.GetData();

        Stopwatch stopwatch = new Stopwatch();
        stopwatch.Start();

        Parallel.ForEach(final_products.AsParallel(), row =>
        {                 
            //Get SURF data for all images found similar to this image
            Types.Products_Unique_SURFRow surfData = GetDataByUniqueProductID(row.id);

            ImageFeature<float>[] row_features = (ImageFeature<float>[])serializer.Deserialize(new StringReader(Decompress(surfData.SURF)));
            if (row_features != null)
                flann.AddSurfDescriptors(row_features, row.id);                       
        });

        stopwatch.Stop();
        Console.WriteLine("Time elapsed: {0}", stopwatch.Elapsed);
    }

这是正常的,它需要这么长时间,如果不是,我该如何优化代码?

GetDataByUniqueProductID(row.id) 是对我的数据库的调用,它返回 1 行。

private static Types.Products_Unique_SURFRow GetDataByUniqueProductID(int rowid)
    {
        Types.Products_Unique_SURFRow ret = new Types.Products_Unique_SURFRow();

        string sqlText = "SET ROWCOUNT 1 SELECT SURF from Products_Unique_SURF WHERE unique_product_id =" + rowid;

        using (SqlConnection myConn = new SqlConnection(global::SCBot.Properties.Settings.Default.DataConnectionString))
        {
            myConn.Open();

            SqlCommand cmd = new SqlCommand(sqlText, myConn);
            try
            {
                cmd.CommandType = CommandType.Text;

                SqlDataReader reader = cmd.ExecuteReader();
                while (reader.Read())
                {
                    Types.Products_Unique_SURFRow row = new Types.Products_Unique_SURFRow();
                    row.SURF = Convert.ToString(reader["SURF"]);

                    ret = row;
                }
            }
            catch (Exception e)
            {
                MessageBox.Show(e.ToString());
            }
        }
        return ret;
    }

我的初始代码如下

 public void GenerateTreeFromAllFinalProducts()
    {
        XmlSerializer serializer = new XmlSerializer(typeof(ImageFeature<float>[]));
        DSTableAdapters.Products_UniqueTableAdapter pft = new DSTableAdapters.Products_UniqueTableAdapter();
        DSTableAdapters.Products_Unique_SURFTableAdapter pus = new DSTableAdapters.Products_Unique_SURFTableAdapter();
        DS.Products_UniqueDataTable final_products = pft.GetData();

        foreach (DS.Products_UniqueRow row in final_products)
        {
            //Get SURF data for all images found similar to this image
            List<DS.Products_Unique_SURFRow> surfData = pus.GetDataByUniqueProductID(row.id).ToList();

            foreach (DS.Products_Unique_SURFRow data in surfData)
            {
                ImageFeature<float>[] row_features = (ImageFeature<float>[])serializer.Deserialize(new StringReader(Decompress(data.SURF)));
                flann.AddSurfDescriptors(row_features, row.id);
            }
        }
    }

但这太慢了,这就是为什么我尝试做一个 Parallel.Foreach

4

1 回答 1

0

您应该测量内部代码所花费的时间。并行不会加速内部代码。您可以单独测量它们,而不是嵌套方法 Decompress/Deserialize。

编辑后:

每个线程都会创建一个新的连接。我认为这是如何使用并行加速的一个很好的例子。但是改变算法会更有帮助。因为查询不同连接/线程上的单行将花费更多时间,而不是查询它们全部(或选择)和单线程 foreach 循环它们。

我的观点:

我会收集 final_products enemration 的所有 id 并使用 stringbuilder 构建一个字符串。

StringBuilder sb = new StringBuiler();
bool isFirst = true;

sb.Append("(");
foreach(var prod in final_products)
{
    if(isFirst)
        isFirst = false;
    else
        sb.Append(", ");

    sb.Append(prod.Id);
}
sb.Append(")");

string query = "SELECT SURF FROM Products_Unique_SURF WHERE Id in "+sb.ToString();

// execute the query

// foreach row, decompress, deserialize etc...
于 2013-08-08T09:44:57.283 回答