第一个也是最好的建议:基准测试!
不要把你得到的任何建议当作确定的事实(即使是我的)。性能会因您的操作系统、硬件和 PHP 版本而异。
以下应该是一种快速的方法,并直接为您包含一个微基准。请对其进行测试并告诉我们。
<?php
$start = microtime(true);
function get_file_handle($file, $mode) {
$h = fopen(__DIR__ . DIRECTORY_SEPARATOR . $file, "{$mode}b");
if (!$h) {
trigger_error("Could not read {$file}.", E_USER_ERROR);
}
// Make sure nobody else is reading or writing to our file.
if (flock($h, LOCK_SH | LOCK_EX) === false) {
trigger_error("Could not acquire lock for {$file}", E_USER_ERROR);
}
return $h;
}
// We only want to read and not write.
$input_handle1 = get_file_handle("input1", "r");
$input_handle2 = get_file_handle("input2", "r");
// We only want to write and not read.
$output_handle = get_file_handle("output", "w");
// Read from both files at the same time the next line.
// NOTE: This only works if lines are always corresponding in both files.
while (($buffer1 = fgets($input_handle1)) !== false && ($buffer2 = fgets($input_handle2)) !== false) {
$buffer1 = explode("\t", $buffer1);
$buffer2 = explode("\t", $buffer2);
// Forget floatval, let PHP do its dynamic casting.
// NOTE: If precision is important use e.g. bcmath!
$a1 = $buffer1[2] * $buffer1[3];
$a2 = $buffer2[2] * $buffer2[3];
$ansnp = $buffer1[2] - $buffer2[2];
$a3 = round(($a1 - $a2) / $ansnp, 3);
if (fwrite($output_handle, "{$buffer1[0]}\t{$buffer1[1]}\t{$ansnp}\t{$a3}\r\n") === false) {
trigger_error("Could not write result to output file.", E_USER_ERROR);
}
}
// Release locks on and close all file handles.
foreach (array($input_handle1, $input_handle2, $output_handle) as $delta => $handle) {
if (flock($handle, LOCK_UN) === false) {
trigger_error("Could not release lock!", E_USER_ERROR);
}
if (fclose($handle) === false) {
trigger_error("Could not close file handle!", E_USER_ERROR);
}
}
echo "Finished processing after " , (microtime(true) - $start) , PHP_EOL;
当然,这也可以以 OO 方式完成,但有例外等。
行缓冲
// Determines how many lines to buffer between each calculation/write.
$lines_to_buffer = 1000;
while (!feof($input_handle1) && !feof($input_handle2)) {
$c1 = $c2 = 0;
// Read lines from first handle, then read files from second handle.
// NOTE: Reading multiple lines from the same file in a row allows us to make best use of the hard disk, if it isn't
// an SSD, since we consecutively read from the same location which yields minimum seeks. But also keep in mind that
// this might not be true if multiple processes are running in parallel, since they might read from different files
// at the same time.
foreach (array(1 => $input_handle1, 2 => $input_handle2) as $i => $handle) {
while (($line = fgets($handle)) !== false) {
${"buffer{$i}"}[] = explode("\t", $line);
// Break if we read enough lines.
if (++${"c{$i}"} === $lines_to_buffer) {
break;
}
}
}
// Validate?
if ($c1 !== $c2) {
trigger_error("Lines from input files differ, aborting.", E_USER_ERROR);
}
for ($i = 0; $i < $lines_to_buffer; ++$i) {
$a1 = $buffer1[$i][2] * $buffer1[$i][3];
$a2 = $buffer2[$i][2] * $buffer2[$i][3];
$ansnp = $buffer1[$i][2] - $buffer2[$i][2];
$a3 = round(($a1 - $a2) / $ansnp, 3);
$result .= "{$buffer1[0]}\t{$buffer1[1]}\t{$ansnp}\t{$a3}\r\n";
}
fwrite($output_handle, $result);
// Reset
$result = $buffer1 = $buffer2 = null;
}