其他需要考虑的事情,特别是对于那些多年后发现这一点的人来说,取决于您的情况,通常最好在 DataTable 中收集所有数据,然后在每个主要任务结束时使用 SqlBulkCopy。
例如,我创建了一个处理数百万个文件的进程,当每个文件事务进行数据库查询以插入记录时,我遇到了相同的错误。相反,我将其全部存储在内存中的 DataTable 中,用于我迭代的每个共享,将 DataTable 转储到我的 SQL Server 并在每个单独的共享之间清除它。批量插入需要一秒钟的时间,并且不会一次打开数千个连接。
编辑:这是一个快速而肮脏的工作示例 SQLBulkCopy 方法:
private static void updateDatabase(DataTable targetTable)
{
try
{
DataSet ds = new DataSet("FileFolderAttribute");
ds.Tables.Add(targetTable);
writeToLog(targetTable.TableName + " - Rows: " + targetTable.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
writeToLog(@"Opening SQL connection", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
Console.WriteLine(@"Opening SQL connection");
SqlConnection sqlConnection = new SqlConnection(sqlConnectionString);
sqlConnection.Open();
SqlBulkCopy bulkCopy = new SqlBulkCopy(sqlConnection, SqlBulkCopyOptions.TableLock | SqlBulkCopyOptions.FireTriggers | SqlBulkCopyOptions.UseInternalTransaction, null);
bulkCopy.DestinationTableName = "FileFolderAttribute";
writeToLog(@"Copying data to SQL Server table", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
Console.WriteLine(@"Copying data to SQL Server table");
foreach (var table in ds.Tables)
{
writeToLog(table.ToString(), logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
Console.WriteLine(table.ToString());
}
bulkCopy.WriteToServer(ds.Tables[0]);
sqlConnection.Close();
sqlConnection.Dispose();
writeToLog(@"Closing SQL connection", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
writeToLog(@"Clearing local DataTable...", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
Console.WriteLine(@"Closing SQL connection");
Console.WriteLine(@"Clearing local DataTable...");
targetTable.Clear();
ds.Tables.Remove(targetTable);
ds.Clear();
ds.Dispose();
}
catch (Exception error)
{
errorLogging(error, getCurrentMethod(), logDatabaseFile);
}
}
...并将其转储到数据表中:
private static void writeToDataTable(string ServerHostname, string RootDirectory, string RecordType, string Path, string PathDirectory, string PathFileName, string PathFileExtension, decimal SizeBytes, decimal SizeMB, DateTime DateCreated, DateTime DateModified, DateTime DateLastAccessed, string Owner, int PathLength, DateTime RecordWriteDateTime)
{
try
{
if (tableToggle)
{
DataRow toInsert = results_1.NewRow();
toInsert[0] = ServerHostname;
toInsert[1] = RootDirectory;
toInsert[2] = RecordType;
toInsert[3] = Path;
toInsert[4] = PathDirectory;
toInsert[5] = PathFileName;
toInsert[6] = PathFileExtension;
toInsert[7] = SizeBytes;
toInsert[8] = SizeMB;
toInsert[9] = DateCreated;
toInsert[10] = DateModified;
toInsert[11] = DateLastAccessed;
toInsert[12] = Owner;
toInsert[13] = PathLength;
toInsert[14] = RecordWriteDateTime;
results_1.Rows.Add(toInsert);
}
else
{
DataRow toInsert = results_2.NewRow();
toInsert[0] = ServerHostname;
toInsert[1] = RootDirectory;
toInsert[2] = RecordType;
toInsert[3] = Path;
toInsert[4] = PathDirectory;
toInsert[5] = PathFileName;
toInsert[6] = PathFileExtension;
toInsert[7] = SizeBytes;
toInsert[8] = SizeMB;
toInsert[9] = DateCreated;
toInsert[10] = DateModified;
toInsert[11] = DateLastAccessed;
toInsert[12] = Owner;
toInsert[13] = PathLength;
toInsert[14] = RecordWriteDateTime;
results_2.Rows.Add(toInsert);
}
}
catch (Exception error)
{
errorLogging(error, getCurrentMethod(), logFile);
}
}
...这是上下文,循环片段本身:
private static void processTargetDirectory(DirectoryInfo rootDirectory, string targetPathRoot)
{
DateTime StartTime = DateTime.Now;
int directoryCount = 0;
int fileCount = 0;
try
{
manageDataTables();
Console.WriteLine(rootDirectory.FullName);
writeToLog(@"Working in Directory: " + rootDirectory.FullName, logFile, getLineNumber(), getCurrentMethod(), true);
applicationsDirectoryCount++;
// REPORT DIRECTORY INFO //
string directoryOwner = "";
try
{
directoryOwner = File.GetAccessControl(rootDirectory.FullName).GetOwner(typeof(System.Security.Principal.NTAccount)).ToString();
}
catch (Exception error)
{
//writeToLog("\t" + rootDirectory.FullName, logExceptionsFile, getLineNumber(), getCurrentMethod(), true);
writeToLog("[" + error.Message + "] - " + rootDirectory.FullName, logExceptionsFile, getLineNumber(), getCurrentMethod(), true);
errorLogging(error, getCurrentMethod(), logFile);
directoryOwner = "SeparatedUser";
}
writeToRawLog(serverHostname + "," + targetPathRoot + "," + "Directory" + "," + rootDirectory.Name + "," + rootDirectory.Extension + "," + 0 + "," + 0 + "," + rootDirectory.CreationTime + "," + rootDirectory.LastWriteTime + "," + rootDirectory.LastAccessTime + "," + directoryOwner + "," + rootDirectory.FullName.Length + "," + DateTime.Now + "," + rootDirectory.FullName + "," + "", logResultsFile, true, logFile);
//writeToDBLog(serverHostname, targetPathRoot, "Directory", rootDirectory.FullName, "", rootDirectory.Name, rootDirectory.Extension, 0, 0, rootDirectory.CreationTime, rootDirectory.LastWriteTime, rootDirectory.LastAccessTime, directoryOwner, rootDirectory.FullName.Length, DateTime.Now);
writeToDataTable(serverHostname, targetPathRoot, "Directory", rootDirectory.FullName, "", rootDirectory.Name, rootDirectory.Extension, 0, 0, rootDirectory.CreationTime, rootDirectory.LastWriteTime, rootDirectory.LastAccessTime, directoryOwner, rootDirectory.FullName.Length, DateTime.Now);
if (rootDirectory.GetDirectories().Length > 0)
{
Parallel.ForEach(rootDirectory.GetDirectories(), new ParallelOptions { MaxDegreeOfParallelism = directoryDegreeOfParallelism }, dir =>
{
directoryCount++;
Interlocked.Increment(ref threadCount);
processTargetDirectory(dir, targetPathRoot);
});
}
// REPORT FILE INFO //
Parallel.ForEach(rootDirectory.GetFiles(), new ParallelOptions { MaxDegreeOfParallelism = fileDegreeOfParallelism }, file =>
{
applicationsFileCount++;
fileCount++;
Interlocked.Increment(ref threadCount);
processTargetFile(file, targetPathRoot);
});
}
catch (Exception error)
{
writeToLog(error.Message, logExceptionsFile, getLineNumber(), getCurrentMethod(), true);
errorLogging(error, getCurrentMethod(), logFile);
}
finally
{
Interlocked.Decrement(ref threadCount);
}
DateTime EndTime = DateTime.Now;
writeToLog(@"Run time for " + rootDirectory.FullName + @" is: " + (EndTime - StartTime).ToString() + @" | File Count: " + fileCount + @", Directory Count: " + directoryCount, logTimingFile, getLineNumber(), getCurrentMethod(), true);
}
如上所述,这既快又脏,但效果很好。
对于我遇到大约 2,000,000 条记录后遇到的与内存相关的问题,我必须创建第二个 DataTable 并在这两个数据表之间交替,在交替之间将记录转储到 SQL 服务器。所以我的 SQL 连接由每 100,000 条记录中的 1 个组成。
我是这样管理的:
private static void manageDataTables()
{
try
{
Console.WriteLine(@"[Checking datatable size] toggleValue: " + tableToggle + " | " + @"r1: " + results_1.Rows.Count + " - " + @"r2: " + results_2.Rows.Count);
if (tableToggle)
{
int rowCount = 0;
if (results_1.Rows.Count > datatableRecordCountThreshhold)
{
tableToggle ^= true;
writeToLog(@"results_1 row count > 100000 @ " + results_1.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
rowCount = results_1.Rows.Count;
logResultsFile = "FileServerReport_Results_" + DateTime.Now.ToString("yyyyMMdd-HHmmss") + ".txt";
Thread.Sleep(5000);
if (results_1.Rows.Count != rowCount)
{
writeToLog(@"results_1 row count increased, @ " + results_1.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
rowCount = results_1.Rows.Count;
Thread.Sleep(15000);
}
writeToLog(@"results_1 row count stopped increasing, updating database...", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
updateDatabase(results_1);
results_1.Clear();
writeToLog(@"results_1 cleared, count: " + results_1.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
}
}
else
{
int rowCount = 0;
if (results_2.Rows.Count > datatableRecordCountThreshhold)
{
tableToggle ^= true;
writeToLog(@"results_2 row count > 100000 @ " + results_2.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
rowCount = results_2.Rows.Count;
logResultsFile = "FileServerReport_Results_" + DateTime.Now.ToString("yyyyMMdd-HHmmss") + ".txt";
Thread.Sleep(5000);
if (results_2.Rows.Count != rowCount)
{
writeToLog(@"results_2 row count increased, @ " + results_2.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
rowCount = results_2.Rows.Count;
Thread.Sleep(15000);
}
writeToLog(@"results_2 row count stopped increasing, updating database...", logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
updateDatabase(results_2);
results_2.Clear();
writeToLog(@"results_2 cleared, count: " + results_2.Rows.Count, logDatabaseFile, getLineNumber(), getCurrentMethod(), true);
}
}
}
catch (Exception error)
{
errorLogging(error, getCurrentMethod(), logDatabaseFile);
}
}
其中“datatableRecordCountThreshhold = 100000”