0

只需为我正在运营呼叫中心的朋友编写以下算法,他想格式化他的文件名并根据月份和日期移动目录。但是呼叫中心有超过 350 万个文件,而且该程序似乎工作了 12 个小时,只处理了不到 20gb

那么有没有办法优化下面的算法,

class Program
{        
    // How much deep to scan. (of course you can also pass it to the method)
    const int HowDeepToScan = 20;

    static void Main(string[] args)
    {
        ProcessDir(@"E:\Hard Disk 2\", 1);
        Console.WriteLine("Islem Bitmistir");
        Console.ReadLine();
    }

    public static void ProcessDir(string sourceDir, int recursionLvl)
    {
        if (recursionLvl <= HowDeepToScan)
        {
            ChangeDirectories(sourceDir);

            // Recurse into subdirectories of this directory.
            string[] subdirEntries = Directory.GetDirectories(sourceDir);
            foreach (string subdir in subdirEntries)
                // Do not iterate through reparse points
                if ((File.GetAttributes(subdir) &
                     FileAttributes.ReparsePoint) !=
                         FileAttributes.ReparsePoint)
                    ProcessDir(subdir+@"\",recursionLvl + 1);
        }
    }

    public static void ChangeDirectories(string givenPath)
    {
        DataTable resultSet = new DataTable();
        SqlDataAdapter adapter = new SqlDataAdapter();
        SqlCommand cmd = new SqlCommand();
        SqlConnection callCenterConnection = new SqlConnection(@"Integrated Security=SSPI;Persist Security Info=False;Initial Catalog=CallCenter;Data Source=.");
        //Directory of mp3s
        string sourceDir = givenPath;
        //Get the files inside that directory
        string[] fileEntries = Directory.GetFiles(sourceDir);
        callCenterConnection.Open();
        //Iterate through those files
        foreach (string fullFileName in fileEntries)
        {
            //Get the file name without path and extension
            string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fullFileName);
            adapter = new SqlDataAdapter("SELECT TOP 1 ID,Time,Tel,AgentID FROM ResultTable WHERE ID=" + fileNameWithoutExtension, callCenterConnection);
            adapter.Fill(resultSet);
        }
        adapter.Dispose();
        if (resultSet.Rows.Count != 0)
        {
            foreach (DataRow dr in resultSet.Rows)
            {
                DateTime fileDate = Convert.ToDateTime(dr["Time"]);
                if (!File.Exists(@"E:\Ses Dosyalari" + @"\" + fileDate.Year + @"\" + fileDate.Month + @"\" + Convert.ToString(dr["Time"]).Replace(":", ".") + " - " + Convert.ToString(dr["Tel"]) + " - " + Convert.ToString(dr["AgentID"]) + ".mp3"))
                {
                    File.Move(sourceDir + Convert.ToString(dr["ID"]) + ".mp3", @"E:\Ses Dosyalari" + @"\" + fileDate.Year + @"\" + fileDate.Month + @"\" + Convert.ToString(dr["Time"]).Replace(":", ".") + " - " + Convert.ToString(dr["Tel"]) + " - " + Convert.ToString(dr["AgentID"]) + ".mp3");
                    cmd = new SqlCommand("UPDATE ResultTable SET Used = 1 WHERE ID="+Convert.ToString(dr["ID"]), callCenterConnection);
                    cmd.ExecuteNonQuery();
                }
            }
        }
        cmd.Dispose();
        callCenterConnection.Close();
        resultSet.Clear();
        resultSet.Dispose();
    }
} 
4

1 回答 1

0

很有可能瓶颈不是代码的性能特征,而是位置之间的网络。

也就是说,特别是第二种方法有很大的优化空间。

首先,总是批量处理数据库中的数据,一次检查一条记录的存在成本很高,一次读取它们并将它们放入一个列表中。迭代所述列表以执行您的移动/状态更新。

其次,您将字符串与+use String.Formator连接起来String.Concat(或者StringBuilder如果这更符合您的喜好)

由于此操作中最繁重的资源是网络,我建议使用压缩库来包装一个目录的所有文件(至少)将其运送到远程站点并有一个进程将其解压缩并发送返回完成通知。少送就是多。

于 2012-08-31T07:38:19.477 回答