我目前正在学习使用 Akka 创建分布式软件,我正在尝试制作的程序是一个抓取程序。我面临的主要挑战是我需要控制发送到同一主机的simunitanies 请求数量,因此我不会被网站禁止。因为我以前从未使用过 Akka 到任何真正的扩展,我真的可以在我的设计中使用一些帮助,如果它很好或完全错过了重点。
我的设计理念是为每个主机制作一个路由器,其中包含我想要继续访问站点的路由数量。与协调参与者一起将请求发送到正确的路由器。
由于我的 Cordinator 是一个普通的actors,它一次只能路由一个请求,有没有办法让它像路由器一样能够同时路由多个请求,所以它不会是一个瓶颈?
我在代码中的想法。
public class HttpRequestCordinator : ReceiveActor
{
private Dictionary<string, IActorRef> hostDownloader;
public HttpRequestCordinator()
{
hostDownloader = new Dictionary<string, IActorRef>();
this.Receive<HttpRequestMessage>(r =>
{
this.OnHttpRequesetMessage(r);
});
}
private void OnHttpRequesetMessage(HttpRequestMessage message)
{
var host = message.Address.Host.ToLower();
if (!hostDownloader.ContainsKey(host))
{
IActorRef child = Context.ActorOf(Props.Create(() => new HttpRequestActor()).WithRouter(new RoundRobinPool(1, new DefaultResizer(0, 10))));
hostDownloader.Add(host, child);
}
hostDownloader[host].Tell(message, Sender);
}
}
public class HttpRequestActor : ReceiveActor
{
public HttpRequestActor()
{
Receive<Messages.HttpRequestMessage>(async r =>
{
try
{
CancellationTokenSource cancellationToken = new CancellationTokenSource();
using (var handler = this.GetHandler())
{
using (HttpClient client = new HttpClient(handler))
{
AddDefaultHeadersToClient(client);
cancellationToken.CancelAfter(r.TimeOut);
var result = await client.SendAsync(r.Message, cancellationToken.Token);
Sender.Tell(result);
}
}
}
catch (Exception exp)
{
Sender.Tell(new HttpRequsetFailed(r));
}
});
}
private HttpClientHandler GetHandler()
{
return new HttpClientHandler()
{
UseCookies = false,
AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
};
}
private void AddDefaultHeadersToClient(HttpClient client)
{
client.DefaultRequestHeaders.Add("Accept", "*/*");
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate");
client.DefaultRequestHeaders.Add("Accept-Language", "da-DK,da;q=0.8,en-US;q=0.6,en;q=0.4");
client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115 Safari/537.36");
client.DefaultRequestHeaders.Add("AcceptCharset", "utf-8");
}
}