正如您所猜测的那样,很难判断您是否可以通过使用线程来看到任何收益......
但是,我决定根据您的想法编写一个不错的 pthreads 示例,我认为它很好地说明了您在线程处理时应该做的事情......
你的里程会有所不同,但这里的例子都是一样的:
<?php
/* create a mutex for readable logging output */
define ("LOG", Mutex::create());
/* log a message to stdout, use as thread safe printf */
function out($message, $format = null) {
$format = func_get_args();
if ($format) {
$message = array_shift(
$format);
Mutex::lock(LOG);
echo vsprintf(
$message, $format
);
Mutex::unlock(LOG);
}
}
/*
Sums is a collection of sum => file shared among workers
*/
class Sums extends Stackable {
public function run(){}
}
/* Worker to execute sum tasks */
class CheckWorker extends Worker {
public function run() {}
}
/*
The simplest version of a job that calculates the checksum of a file
*/
class Check extends Stackable {
/* all properties are public */
public $file;
public $sum;
/* accept a file and Sums collection */
public function __construct($file, Sums &$sums) {
$this->file = $file;
$this->sums = $sums;
}
public function run(){
out(
"checking: %s\n", $this->file);
/* calculate checksum */
$sum = md5_file($this->file);
/* check for sum in list */
if (isset($this->sums[$sum])) {
/* deal with duplicate */
out(
"duplicate file found: %s, duplicate of %s\n", $this->file, $this->sums[$sum]);
} else {
/* set sum in shared list */
$this->sums[$sum] = $this->file;
/* output some info ... */
out(
"unique file found: %s, sum (%s)\n", $this->file, $sum);
}
}
}
/* start a timer */
$start = microtime(true);
/* checksum collection, shared across all threads */
$sums = new Sums();
/* create a suitable amount of worker threads */
$workers = array();
$checks = array();
$worker = 0;
/* how many worker threads you have depends on your hardware */
while (count($workers) < 16) {
$workers[$worker] = new CheckWorker();
$workers[$worker]->start();
$worker++;
}
/* scan path given on command line for files */
foreach (scandir($argv[1]) as $id => $path) {
/* @TODO(u) write code to recursively scan a path */
$path = sprintf(
"%s/%s",
$argv[1], $path
);
/* create a job to calculate the checksum of a file */
if (!is_dir($path)) {
$checks[$id] = new Check(
$path, $sums);
/* @TODO(u) write code to stack to an appropriate worker */
$workers[array_rand($workers)]->stack($checks[$id]);
}
}
/* join threads */
foreach ($workers as $worker) {
$worker->shutdown();
}
/* output some info */
out("complete in %.3f seconds\n", microtime(true)-$start);
/* destroy logging mutex */
Mutex::destroy(LOG);
?>
尝试一下,看看不同数量的工作人员如何影响运行时间,并实现你自己的逻辑来删除文件和扫描目录(这是你应该已经知道的基本知识,为了一个简单的例子而省略了)......