当文件很大(比我的 RAM 大得多)时,我用它来删除尾随的空值:
static void RemoveTrailingNulls(string inputFilename, string outputFilename)
{
int bufferSize = 100 * 1024 * 1024;
long totalTrailingNulls = 0;
byte[] emptyArray = new byte[bufferSize];
using (var inputFile = File.OpenRead(inputFilename))
using (var inputFileReversed = new ReverseStream(inputFile))
{
var buffer = new byte[bufferSize];
while (true)
{
var start = DateTime.Now;
var bytesRead = inputFileReversed.Read(buffer, 0, buffer.Length);
if (bytesRead == emptyArray.Length && Enumerable.SequenceEqual(emptyArray, buffer))
{
totalTrailingNulls += buffer.Length;
}
else
{
var nulls = buffer.Take(bytesRead).TakeWhile(b => b == 0).Count();
totalTrailingNulls += nulls;
if (nulls < bytesRead)
{
//found the last non-null byte
break;
}
}
var duration = DateTime.Now - start;
var mbPerSec = (bytesRead / (1024 * 1024D)) / duration.TotalSeconds;
Console.WriteLine($"{mbPerSec:N2} MB/seconds");
}
var lastNonNull = inputFile.Length - totalTrailingNulls;
using (var outputFile = File.Open(outputFilename, FileMode.Create, FileAccess.Write))
{
inputFile.Seek(0, SeekOrigin.Begin);
inputFile.CopyTo(outputFile, lastNonNull, bufferSize);
}
}
}
它使用 ReverseStream 类,可在此处找到。
而这个扩展方法:
public static class Extensions
{
public static long CopyTo(this Stream input, Stream output, long count, int bufferSize)
{
byte[] buffer = new byte[bufferSize];
long totalRead = 0;
while (true)
{
if (count == 0) break;
int read = input.Read(buffer, 0, (int)Math.Min(bufferSize, count));
if (read == 0) break;
totalRead += read;
output.Write(buffer, 0, read);
count -= read;
}
return totalRead;
}
}