1

我正在尝试编译类似以下代码的内容。似乎我需要帮助它理解我希望所有匹配臂都被视为futures::future::IntoFuture,因为这是外部and_then对回调/关闭/委托的期望。

目前,所有分支都使用最简单的枚举变体 stub NothingUseful(),但我的目标最终是根据返回的 HTTP 状态代码和/或正文内容(如果适用)采取各种操作。

extern crate futures;
extern crate hyper;
extern crate tokio_core;

use futures::{future, Future, Stream};
use hyper::{Client, Error as HyperError, Response, StatusCode, Uri};
use tokio_core::reactor::Core;

struct RecurseUrl {
    uri: Uri,
    remaining_attempts: u8,
}

enum FetchResult {
    SimpleData(u16),
    RecurseUrls(Vec<RecurseUrl>),
    NothingUseful(),
}

fn handle_redirect(res: &Response) -> future::FutureResult<FetchResult, HyperError> {
    future::ok(FetchResult::NothingUseful())
}

fn main() {
    let url = "http://someurl.com"
        .parse()
        .expect("Unable to parse URL");

    let mut core = Core::new().expect("Unable to instantiate Tokio Core");
    let client = Client::new(&core.handle());

    let work = client.get(url).and_then(|res| {

        match res.status() {
            StatusCode::TemporaryRedirect => handle_redirect(&res),
            StatusCode::PermanentRedirect => handle_redirect(&res),
            StatusCode::Ok => {
                res.body().concat2().and_then(move |body| {
                    Ok(FetchResult::NothingUseful())
                })
            },
            _ => {
                Ok(FetchResult::NothingUseful())
            }
        }
    });

    core.run(work).expect("Problem running work");
}
    error[E0308]: match arms have incompatible types
  --> main.rs:34:13
   |
34 | /             match res.status() {
35 | |                 StatusCode::TemporaryRedirect => handle_redirect(&res),
36 | |                 StatusCode::PermanentRedirect => handle_redirect(&res),
37 | |                 StatusCode::Ok => {
...  |
44 | |                 }
45 | |             }
   | |_____________^ expected struct `futures::FutureResult`, found struct `futures::AndThen`
   |
   = note: expected type `futures::FutureResult<FetchResult, hyper::Error>`
              found type `futures::AndThen<futures::stream::Concat2<hyper::Body>, std::result::Result<FetchResult, hyper::Error>, [closure@main.rs:38:51: 40:22]>`
note: match arm with an incompatible type
  --> main.rs:37:35
   |
37 |                   StatusCode::Ok => {
   |  ___________________________________^
38 | |                     res.body().concat2().and_then(move |body| {
39 | |                         Ok(FetchResult::NothingUseful())
40 | |                     })
41 | |                 },
   | |_________________^
4

1 回答 1

2

我希望所有匹配项都被视为futures::future::IntoFuture,因为这是外部and_then对回调/关闭/委托的期望。

and_then期望闭包的返回类型是实现 trait的单个具体类型。你返回多个具体类型——这在 Rust 中是不允许的,因为编译器不知道要分配多少堆栈空间。 IntoFuturematch

您需要将所有各种类型转换为一个统一的类型。最简单的方法是将它们全部装箱,创建一个特征对象 ( Box<Future<Item = FetchResult, Error = hyper::Error>>):

let work = client.get(url).and_then(|res| -> Box<Future<Item = FetchResult, Error = hyper::Error>> {
        match res.status() {
            StatusCode::TemporaryRedirect => Box::new(handle_redirect(&res)),
            StatusCode::PermanentRedirect => Box::new(handle_redirect(&res)),
            StatusCode::Ok => Box::new(
                res.body()
                    .concat2()
                    .map(move |body| FetchResult::NothingUseful()),
            ),
            _ => Box::new(future::ok(FetchResult::NothingUseful())),
        }
    },
);

您还可以创建自己的类型并Future为此实现。这使您可以避免任何分配:

#[macro_use]
extern crate futures;
extern crate hyper;
extern crate tokio_core;

use futures::{Async, Future, Poll};
use hyper::client::{FutureResponse, HttpConnector};
use hyper::{Client, Response, StatusCode, Uri};
use tokio_core::reactor::Core;

struct RecurseUrl {
    client: Client<HttpConnector>,
    future: FutureResponse,
    remaining_attempts: u8,
}

impl RecurseUrl {
    fn new(client: Client<HttpConnector>, uri: Uri) -> Self {
        let future = client.get(uri);
        Self {
            client,
            future,
            remaining_attempts: 3,
        }
    }
}

impl Future for RecurseUrl {
    type Item = hyper::Response;
    type Error = hyper::Error;

    fn poll(&mut self) -> Poll<Self::Item, Self::Error> {
        let response = try_ready!(self.future.poll());

        match response.status() {
            StatusCode::TemporaryRedirect | StatusCode::PermanentRedirect => {
                if self.remaining_attempts == 0 {
                    panic!("return a real error")
                }

                let next_uri = get_redirect_uri_from_response(&response);
                let next_future = self.client.get(next_uri);
                self.future = next_future;
                self.remaining_attempts -= 1;

                Ok(Async::NotReady)
            }
            StatusCode::Ok => Ok(Async::Ready(response)),
            _ => panic!("return a real error"),
        }
    }
}

fn get_redirect_uri_from_response(_response: &Response) -> Uri {
    unimplemented!()
}

fn main() {
    let uri = "http://someurl.com".parse().expect("Unable to parse URL");

    let mut core = Core::new().expect("Unable to instantiate Tokio Core");
    let client = Client::new(&core.handle());

    let work = RecurseUrl::new(client, uri);
    core.run(work).expect("Problem running work");
}

然后,您可以使用以下内容处理从响应正文中读取 URI:

use futures::stream::{Stream, FuturesUnordered, Concat2};

struct WebCrawler {
    client: Client<HttpConnector>,
    to_fetch: FuturesUnordered<FutureResponse>,
    fetching: FuturesUnordered<Concat2<hyper::Body>>,
}

impl WebCrawler {
    fn new(client: Client<HttpConnector>, uri: Uri) -> Self {
        let future = client.get(uri);
        let to_fetch: FuturesUnordered<_> = Some(future).into_iter().collect();

        Self {
            client,
            to_fetch,
            fetching: FuturesUnordered::new(),
        }
    }
}

impl Stream for WebCrawler {
    type Item = hyper::Chunk;
    type Error = hyper::Error;

    fn poll(&mut self) -> Poll<Option<Self::Item>, Self::Error> {
        loop {
            match self.to_fetch.poll()? {
                Async::Ready(Some(s)) => {
                    self.fetching.push(s.body().concat2())
                },
                Async::Ready(None) | Async::NotReady => break,
            }
        }

        loop {
            match self.fetching.poll()? {
                Async::Ready(Some(body)) => {
                    for uri in get_uris_from_body(&body) {
                        self.to_fetch.push(self.client.get(uri));
                    }
                    return Ok(Async::Ready(Some(body)));
                },
                Async::Ready(None) | Async::NotReady => break,
            }
        }

        if self.to_fetch.is_empty() && self.fetching.is_empty() {
            Ok(Async::Ready(None))
        } else {
            Ok(Async::NotReady)
        }
    }
}

fn get_uris_from_body(_body: &hyper::Chunk) -> Vec<Uri> {
    unimplemented!()
}

这个实现是从BufferUnordered. 您需要增强它以传递爬行的深度并对其进行管理,但我认为这是一个很好的草图。

于 2017-11-13T18:49:20.663 回答