我正在尝试构建一个简单的爬虫,但似乎所有线程都没有完成,即使队列是空的:
#!/usr/bin/perl
use strict;
use warnings;
use threads;
use Thread::Queue;
use LWP::UserAgent;
use HTML::LinkExtor;
my $ua = new LWP::UserAgent;
my %visited = ();
my $workQueue = new Thread::Queue;
sub work {
my ($ua, $queue, $hashref) = @_;
my $tid = threads->self->tid;
my $linkExtor = new HTML::LinkExtor;
while (my $next = $queue->dequeue)
{
print "Processin ($tid): ", $next, "\n";
my $resp = $ua->get ($next);
if ($resp->is_success)
{
$linkExtor->parse ($resp->content);
my @links = map { my($tag, %attrs) = @$_;
($tag eq 'a')
? $attrs{href} : () } $linkExtor->links;
$queue->enqueue (@links);
}
}
};
$workQueue->enqueue ('http://localhost');
my @threads = map { threads->create (\&work, $ua, $workQueue, \%visited) } 1..10;
$_->join for @threads;
那么等待这些线程完成的正确方法是什么?它永远不会跳出那个while循环。