2

有没有更优雅/更快的方法来编写下面的代码?目前大约需要 45 秒。

query.sql 有 200,000 行长,其中的 SQL 与每一行完全相同:

SELECT N'+dave' AS [AccountName], N'20005' AS [EmployeeID], N'-6' AS [PlatformID] UNION ALL

我发现通过分成 1000 个块,事情比等到最后并使用 WriteAllText(运行大约需要 20 分钟)要快得多

static void Main(string[] args)
{
    var s = new Stopwatch();
    s.Start();

    string textToWrite = "";
    string[] lines = File.ReadAllLines(@"e:\temp\query.sql");

    int i = 0;
    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                    "'" + value1 + "', "
                    + value2 + ", "
                    + value3 + ")";

        textToWrite += message + Environment.NewLine;

        if (i % 1000 == 0)
        {
            Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
            File.AppendAllText(@"e:\temp\query2.sql", textToWrite);
            textToWrite = "";
        }
        i++;
    }

    //File.WriteAllText(@"e:\temp\query2.sql", textToWrite);
    File.AppendAllText(@"e:\temp\query2.sql", textToWrite);

    s.Stop();
    TimeSpan ts = s.Elapsed;
    Console.WriteLine("Timespan: {0}m", ts.TotalMinutes);
    Console.WriteLine("Total records: " + i);

    Console.ReadLine();
}

编辑:StringBuilder 解决方案(1000 毫秒):

static void Main2(string[] args)
{
    var s = new Stopwatch();
    s.Start();

    var textToWrite = new StringBuilder();
    string[] lines = File.ReadAllLines(@"e:\temp\query.sql");

    int i = 0;
    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                    "'" + value1 + "', "
                    + value2 + ", "
                    + value3 + ")"
                    + Environment.NewLine;

        textToWrite.Append(message);

        // Buffering
        if (i % 1000 == 0)
        {
            Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
            File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());
            textToWrite = new StringBuilder();
        }
        i++;
    }

    File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());

    s.Stop();
    TimeSpan ts = s.Elapsed;
    Console.WriteLine("Timespan: {0}ms", ts.TotalMilliseconds);
    Console.WriteLine("Total records: " + i);

    Console.ReadLine();
}

编辑:StreamWriter 解决方案(450 毫秒)

static void Main(string[] args)
    {
        var s = new Stopwatch();
        s.Start();

        string[] lines = File.ReadAllLines(@"e:\temp\query.sql");
        int i = 0;
        using (StreamWriter writer = File.AppendText(@"e:\temp\query2.sql"))
        {
            foreach (var line in lines)
            {
                var bits = line.Split('\'');

                var value1 = bits[1];
                var value2 = bits[3];
                var value3 = bits[5];

                writer.WriteLine("INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N'{0}', {1}, {2})",
                    value1, value2, value3);

                i++;
            }
        }

        s.Stop();
        TimeSpan ts = s.Elapsed;
        Console.WriteLine("Timespan: {0}ms", ts.TotalMilliseconds);
        Console.WriteLine("Total records: " + i);

        Console.ReadLine();
    }
4

5 回答 5

4

正如其他人指出的那样,使用StringBuilder. 所以在你的情况下,声明:

StringBuilder textToWrite = new StringBuilder();

然后:

textToWrite.AppendLine(message);
if (i % 1000 == 0)
{
    Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
    File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());
    textToWrite = new StringBuilder();
}

尽管您最好完全放弃缓冲:

using (StreamWriter writer = File.AppendText(filename))
{
    // initialization stuff here

    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping]                     ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                "'" + value1 + "', "
                + value2 + ", "
                + value3 + ")";

         writer.WriteLine(message); // write the line
    }
}
于 2013-08-01T16:41:53.780 回答
2

一个好的开始是使用 .net 中内置的 StringBuilder 类。这将避免一堆字符串分配和复制。

请参阅有关其工作原理的 MSDN 文档:http: //msdn.microsoft.com/en-us/library/system.text.stringbuilder.aspx

另请参阅此 Stackoverflow 帖子以获取更多信息:连接字符串的最有效方法?

例子:

StringBuilder a = new StringBuilder();
a.Append("some text");
a.Append("more text");
string result = a.ToString();
于 2013-08-01T16:37:51.820 回答
1

什么版本的sql server?最好的方法不是使用一个巨大的 sql 脚本,而是使用表值参数或使用 sql 服务器批量复制支持。

于 2013-08-01T16:38:15.733 回答
1

最好的方法很可能是同时打开这两个文件,边读边写每一行,然后关闭文件。

但是,您最有可能遇到的最大问题是字符串连接。.NET 中的字符串是不可变的,因此每次连接都会分配一个新副本,这既需要时间也需要内存(尽管 GC 最终会返回后者)。

如果你textToWrite用 a替换StringBuilder,并且最后只做一个ToString(),你会看到更好的性能。

或者,老实说,您可能可以对整个事情进行一次正则表达式替换并完成它,尽管我相信您必须先将整个文件读入内存,就像您已经在做的那样。

于 2013-08-01T16:38:33.827 回答
0

MemoryMappedFiles是高效的,因此它们可能值得研究。

string[] lines = File.ReadAllLines(@"e:\temp\query.sql");
using (var mmf = MemoryMappedFile.CreateFromFile(@"e:\temp\query2.sql", FileMode.Create, "txt", new FileInfo(@"e:\temp\query.sql")Length))
{       
    StringBuilder sb = new StringBuilder();
    using (MemoryMappedViewStream mmvs = mmf.CreateViewStream())
    {
       StreamWriter writer = new StreamWriter(mmvs);
       for (int i = 0; i < lines.Length; i++)
       {
          var bits = lines[i].Split('\'');

          var value1 = bits[1];
          var value2 = bits[3];
          var value3 = bits[5];

          sb.AppendFormat("INSERT [PreStaging].[Import_AccountEmployeeMapping]
                          ([AccountName], [EmployeeID], [PlatformID])
                         VALUES (N'{0}', {1}, {2})", value1, value2, value3);


          writer.WriteLine(message.ToString()); 
      }
   }
}

您可能会发现首先构建整个文本,然后将整个内容写入 MemoryMappedFiled 执行得更好,因为调用更少ToString

于 2013-08-01T17:09:53.133 回答