我已经重写了 dkroy 的算法,因为我认为它进行了过多的强制转换并且过于频繁地查询磁盘。
现在,它一次性加载所有文件信息,让磁盘保持安静,直到我们需要写入 zip 文件。我还优化了比较,它是在内存上完成的,使用引用而不是每次迭代都创建新对象,还将一些变量更改为标准 .NET 类型并删除 StringBuilder,您可以检查代码:
private void CreateZip(string largeDir, string splitIntoDir, double maxFolderSize)
{
int fileNumber = 1;
// We get all the PDFs and idf files at once
FileInfo[] files = new DirectoryInfo(largeDir).GetFiles("*.pdf");
FileInfo[] filesPair = new DirectoryInfo(largeDir).GetFiles("*.idf");
List<FileInfo> toAdd = new List<FileInfo>();
// We match on memory the filenames without extension and create an Anonymous object
// which will contain both files
var pairs = files.Join(filesPair, f => Path.GetFileNameWithoutExtension(f.FullName),
idx => Path.GetFileNameWithoutExtension(idx.FullName), (f, idx) => new {Pdf = f, Index = idx});
long currentOutputSize = 0;
string outputZip = string.Format("{0}{1}{2}_{3}.zip", splitIntoDir, Path.DirectorySeparatorChar, Path.GetFileName(largeDir), fileNumber);
// iterate the pairs that matched the collection
foreach (var pair in pairs)
{
// Sum the current pair of files
currentOutputSize += pair.Pdf.Length + pair.Index.Length;
if (currentOutputSize < maxFolderSize)
{
toAdd.Add(pair.Pdf);
toAdd.Add(pair.Index);
}
else
{
using (ZipFile zip = new ZipFile(outputZip))
{
toAdd.ForEach(f=> zip.AddFile(f.FullName, string.Empty));
zip.Save();
}
// We start a new zip
toAdd.Clear();
fileNumber++;
currentOutputSize += pair.Pdf.Length + pair.Index.Length;
foutputZip = string.Format("{0}{1}{2}_{3}.zip", splitIntoDir, Path.DirectorySeparatorChar, Path.GetFileName(largeDir), fileNumber);
// We add the current iteration's files
toAdd.Add(pair.Pdf);
toAdd.Add(pair.Index);
}
}
}