0

在我的 grpc 客户端中,它每秒发送一个 keepalive 数据包。代码如下:

bool FcrConfigClient::SendHealthCheckRpc(const grpc::string& service_name)
{
    CtlHealthCheckRequest request;
    unsigned int client_connection_timeout = HEALTH_CHECK_TIMER_OUT;
    CtlHealthCheckResponse response;
    ClientContext context;
    CtlFcrStatus *fcr_status;
    std::chrono::system_clock::time_point deadline = std::chrono::system_clock::now() + std::chrono::seconds(client_connection_timeout);
    context.set_deadline(deadline);

    request.set_service(service_name);
    request.set_requestoffline(requestOffline);
    fcr_status = request.mutable_fcr_status();
    fcr_status->set_prober_status(global_grpc_shm_config->proberResult);
    fcr_status->set_offline_flag(global_grpc_shm_config->offline);

    Status s = stub_->check(&context, request, &response);
    ...... 

当我在 Linux 中配置 iptables 以丢弃所有 grpc 数据包时,grpc 通道被阻塞。堆栈是:

(gdb) bt
#0  0x00007ffff5e8f873 in epoll_wait () from /lib64/libc.so.6
#1  0x00007ffff69164c8 in pollable_epoll (deadline=<optimized out>, p=0x7fffe4001890)
    at src/core/lib/iomgr/ev_epollex_linux.cc:823
#2  pollset_work (pollset=0x808570, worker_hdl=<optimized out>, deadline=<optimized out>)
    at src/core/lib/iomgr/ev_epollex_linux.cc:1010
#3  0x00007ffff693a651 in cq_pluck (cq=0x808470, tag=0x7fffffffd990, deadline=..., reserved=<optimized out>)
    at src/core/lib/surface/completion_queue.cc:1172
#4  0x00007ffff693a9db in grpc_completion_queue_pluck (cq=<optimized out>, tag=<optimized out>, deadline=..., 
    reserved=<optimized out>) at src/core/lib/surface/completion_queue.cc:1199
#5  0x00007ffff750a9d8 in grpc::CoreCodegen::grpc_completion_queue_pluck (this=<optimized out>, 
    cq=<optimized out>, tag=<optimized out>, deadline=..., reserved=<optimized out>)
    at src/cpp/common/core_codegen.cc:70
#6  0x0000000000477662 in grpc::CompletionQueue::Pluck (this=0x7fffffffd950, tag=0x7fffffffd990)
    at /usr/local/include/grpcpp/impl/codegen/completion_queue.h:297
#7  0x00000000004f83c2 in grpc::internal::BlockingUnaryCallImpl<fcrctlservice::CtlHealthCheckRequest, fcrctlservice::CtlHealthCheckResponse>::BlockingUnaryCallImpl (this=0x7fffffffdb00, channel=0x7fffe00103c0, method=..., 
    context=0x7fffffffdbd0, request=..., result=0x7fffffffdd70)
    at /usr/local/include/grpcpp/impl/codegen/client_unary_call.h:72
#8  0x00000000004f5bd9 in grpc::internal::BlockingUnaryCall<fcrctlservice::CtlHealthCheckRequest, fcrctlservice::CtlHealthCheckResponse> (channel=0x7fffe00103c0, method=..., context=0x7fffffffdbd0, request=..., 
    result=0x7fffffffdd70) at /usr/local/include/grpcpp/impl/codegen/client_unary_call.h:43
#9  0x00000000004f3eca in fcrctlservice::FcrCtlService::Stub::check (this=0x7fffe0006450, context=0x7fffffffdbd0, 
    request=..., response=0x7fffffffdd70) at fcr_config_client.grpc.pb.cc:73
#10 0x00000000004bdd17 in FcrConfigClient::SendHealthCheckRpc (this=0x7fffe0006290, service_name="16.2.6.138")
    at fcr_config_grpc.cc:380
#11 0x00000000004cc315 in fcr_grpc_health_check_timer (t=0x7fffffffdff0) at fcr_config_grpc.cc:3045
#12 0x00007ffff7274295 in thread_call (thread=0x7fffffffdff0) at lib/thread.c:1542
#13 0x00000000004cfb0b in RunGrpcServer () at fcr_config_grpc.cc:4205
#14 0x00000000004d037b in main (argc=1, argv=0x7fffffffe3d0) at fcr_config_grpc.cc:4378
(gdb)

我在源代码中搜索 grpc::CompletionQueue::Pluck 函数,它的定义如下:

  bool Pluck(internal::CompletionQueueTag* tag) {
    auto deadline =
        g_core_codegen_interface->gpr_inf_future(GPR_CLOCK_REALTIME);
    auto ev = g_core_codegen_interface->grpc_completion_queue_pluck(
        cq_, tag, deadline, nullptr);
    bool ok = ev.success != 0;
    void* ignored = tag;
    GPR_CODEGEN_ASSERT(tag->FinalizeResult(&ignored, &ok));
    GPR_CODEGEN_ASSERT(ignored == tag);
    // Ignore mutations by FinalizeResult: Pluck returns the C API status
    return ev.success != 0;
  }

我在代码中设置的截止日期不会在 Pluck 函数中使用。所以我的问题是为什么grpc客户端被阻塞了,deadline时间没有生效吗?

4

1 回答 1

0

您可能遇到与https://github.com/grpc/grpc/issues/15889相同的问题。

请确保 KEEPALIVE_TIMEOUT 设置 (GRPC_ARG_KEEPALIVE_TIMEOUT_MS) 配置正确。

于 2020-05-28T23:17:20.437 回答