2

我一直在为 istio 苦苦挣扎……所以在这里寻求专家的帮助!

背景

我正在尝试使用dex部署我的 kubeflow 应用程序以实现租户。来自github的manifest文件参考kubeflow官方文档

这是组件/版本信息的列表

  • 我在 GKE 上运行 kubernetes 1.15
  • Istio 1.1.6 已在 kubeflow 中用于服务方法
  • 尝试为 ML 部署 kubeflow 1.0
  • 为 authn 部署 dex 1.0

使用清单文件,我成功地在集群上部署了 kubeflow。这就是我所做的。

  • 在集群上部署 kubeflow 应用
  • 使用 OIDC 服务部署 Dex 启用 authn 到 google Oauth2.0
  • 启用 RBAC
  • 创建特使过滤器以将标题“kubeflow-userid”附加为登录用户

这是第 3 步和第 4 步的验证检查 RBAC 已启用并为 kubeflow-userid 添加了 envoyfilter

[root@gke-client-tf leilichao]# k get clusterrbacconfigs -o yaml
apiVersion: v1
items:
- apiVersion: rbac.istio.io/v1alpha1
  kind: ClusterRbacConfig
  metadata:
    annotations:
      kubectl.kubernetes.io/last-applied-configuration: |
        {"apiVersion":"rbac.istio.io/v1alpha1","kind":"ClusterRbacConfig","metadata":{"annotations":{},"name":"default"},"spec":{"mode":"ON"}}
    creationTimestamp: "2020-07-04T01:28:52Z"
    generation: 2
    name: default
    resourceVersion: "5986075"
    selfLink: /apis/rbac.istio.io/v1alpha1/clusterrbacconfigs/default
    uid: db70920e-f364-40ec-a93b-a3364f88650f
  spec:
    mode: "ON"
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""
[root@gke-client-tf leilichao]# k get envoyfilter -n istio-system -o yaml
apiVersion: v1
items:
- apiVersion: networking.istio.io/v1alpha3
  kind: EnvoyFilter
  metadata:
    annotations:
      kubectl.kubernetes.io/last-applied-configuration: |
        {"apiVersion":"networking.istio.io/v1alpha3","kind":"EnvoyFilter","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"oidc-authservice","app.kubernetes.io/instance":"oidc-authservice-v1.0.0","app.kubernetes.io/managed-by":"kfctl","app.kubernetes.io/name":"oidc-authservice","app.kubernetes.io/part-of":"kubeflow","app.kubernetes.io/version":"v1.0.0"},"name":"authn-filter","namespace":"istio-system"},"spec":{"filters":[{"filterConfig":{"httpService":{"authorizationRequest":{"allowedHeaders":{"patterns":[{"exact":"cookie"},{"exact":"X-Auth-Token"}]}},"authorizationResponse":{"allowedUpstreamHeaders":{"patterns":[{"exact":"kubeflow-userid"}]}},"serverUri":{"cluster":"outbound|8080||authservice.istio-system.svc.cluster.local","failureModeAllow":false,"timeout":"10s","uri":"http://authservice.istio-system.svc.cluster.local"}},"statusOnError":{"code":"GatewayTimeout"}},"filterName":"envoy.ext_authz","filterType":"HTTP","insertPosition":{"index":"FIRST"},"listenerMatch":{"listenerType":"GATEWAY"}}],"workloadLabels":{"istio":"ingressgateway"}}}
    creationTimestamp: "2020-07-04T01:40:43Z"
    generation: 1
    labels:
      app.kubernetes.io/component: oidc-authservice
      app.kubernetes.io/instance: oidc-authservice-v1.0.0
      app.kubernetes.io/managed-by: kfctl
      app.kubernetes.io/name: oidc-authservice
      app.kubernetes.io/part-of: kubeflow
      app.kubernetes.io/version: v1.0.0
    name: authn-filter
    namespace: istio-system
    resourceVersion: "4715289"
    selfLink: /apis/networking.istio.io/v1alpha3/namespaces/istio-system/envoyfilters/authn-filter
    uid: e599ba82-315a-4fc1-9a5d-e8e35d93ca26
  spec:
    filters:
    - filterConfig:
        httpService:
          authorizationRequest:
            allowedHeaders:
              patterns:
              - exact: cookie
              - exact: X-Auth-Token
          authorizationResponse:
            allowedUpstreamHeaders:
              patterns:
              - exact: kubeflow-userid
          serverUri:
            cluster: outbound|8080||authservice.istio-system.svc.cluster.local
            failureModeAllow: false
            timeout: 10s
            uri: http://authservice.istio-system.svc.cluster.local
        statusOnError:
          code: GatewayTimeout
      filterName: envoy.ext_authz
      filterType: HTTP
      insertPosition:
        index: FIRST
      listenerMatch:
        listenerType: GATEWAY
    workloadLabels:
      istio: ingressgateway
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""

RBAC 问题问题分析

在我完成部署后。我进行了以下功能测试:

  • 我可以通过 google oauth 使用我的 google 帐户登录
  • 我能够创建自己的个人资料/命名空间
  • 我能够创建一个笔记本服务器
  • 但是我无法连接到笔记本服务器

RBAC 问题调查

在 kubeflow 上成功创建笔记本服务器并尝试连接笔记本服务器后,我收到“RBAC:访问被拒绝”错误。我设法更新了特使日志级别并获得了下面的日志。

[2020-08-06 13:32:43.290][26][debug][rbac] [external/envoy/source/extensions/filters/http/rbac/rbac_filter.cc:64] checking request: remoteAddress: 10.1.1.2:58012, localAddress: 10.1.2.66:8888, ssl: none, headers: ':authority', 'compliance-kf-system.ml'
':path', '/notebook/roger-l-c-lei/aug06/'
':method', 'GET'
'user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
'accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
'accept-encoding', 'gzip, deflate'
'accept-language', 'en,zh-CN;q=0.9,zh;q=0.8'
'cookie', 'authservice_session=MTU5NjY5Njk0MXxOd3dBTkZvMldsVllVMUZPU0VaR01sSk5RVlJJV2xkRFVrRTFTVUl5V0RKV1EwdEhTMU5QVjFCVlUwTkpSVFpYUlVoT1RGVlBUa0U9fN3lPBXDDSZMT9MTJRbG8jv7AtblKTE3r84ayeCYuKOk; _xsrf=2|1e6639f2|10d3ea0a904e0ae505fd6425888453f8|1596697030'
'referer', 'http://compliance-kf-system.ml/jupyter/'
'upgrade-insecure-requests', '1'
'x-forwarded-for', '10.10.10.230'
'x-forwarded-proto', 'http'
'x-request-id', 'babbf884-4cec-93fd-aea6-2fc60d3abb83'
'kubeflow-userid', 'roger.l.c.lei@XXXX.com'
'x-istio-attributes', 'CjAKHWRlc3RpbmF0aW9uLnNlcnZpY2UubmFtZXNwYWNlEg8SDXJvZ2VyLWwtYy1sZWkKIwoYZGVzdGluYXRpb24uc2VydmljZS5uYW1lEgcSBWF1ZzA2Ck4KCnNvdXJjZS51aWQSQBI+a3ViZXJuZXRlczovL2lzdGlvLWluZ3Jlc3NnYXRld2F5LTg5Y2Q0YmQ0Yy1kdnF3dC5pc3Rpby1zeXN0ZW0KQQoXZGVzdGluYXRpb24uc2VydmljZS51aWQSJhIkaXN0aW86Ly9yb2dlci1sLWMtbGVpL3NlcnZpY2VzL2F1ZzA2CkMKGGRlc3RpbmF0aW9uLnNlcnZpY2UuaG9zdBInEiVhdWcwNi5yb2dlci1sLWMtbGVpLnN2Yy5jbHVzdGVyLmxvY2Fs'
'x-envoy-expected-rq-timeout-ms', '300000'
'x-b3-traceid', '3bf35cca1f7b75e7a42a046b1c124b1f'
'x-b3-spanid', 'a42a046b1c124b1f'
'x-b3-sampled', '1'
'x-envoy-original-path', '/notebook/roger-l-c-lei/aug06/'
'content-length', '0'
'x-envoy-internal', 'true'
, dynamicMetadata: filter_metadata {
  key: "istio_authn"
  value {
  }
}

[2020-08-06 13:32:43.290][26][debug][rbac] [external/envoy/source/extensions/filters/http/rbac/rbac_filter.cc:108] enforced denied

从源代码看来,允许的函数返回 false,因此它给出了“RBAC:拒绝访问”响应。

  if (engine.has_value()) {
    if (engine->allowed(*callbacks_->connection(), headers,
                        callbacks_->streamInfo().dynamicMetadata(), nullptr)) {
      ENVOY_LOG(debug, "enforced allowed");
      config_->stats().allowed_.inc();
      return Http::FilterHeadersStatus::Continue;
    } else {
      ENVOY_LOG(debug, "enforced denied");
      callbacks_->sendLocalReply(Http::Code::Forbidden, "RBAC: access denied", nullptr,
                                 absl::nullopt);
      config_->stats().denied_.inc();
      return Http::FilterHeadersStatus::StopIteration;
    }
  }

我对转储的特使进行了搜索,看起来规则应该允许任何以标题键作为我的邮件地址的请求。现在我可以确认我已经从上面的日志中得到了我的标题。

{
 "name": "envoy.filters.http.rbac",
 "config": {
  "rules": {
   "policies": {
    "ns-access-istio": {
     "permissions": [
      {
       "and_rules": {
        "rules": [
         {
          "any": true
         }
        ]
       }
      }
     ],
     "principals": [
      {
       "and_ids": {
        "ids": [
         {
          "header": {
           "exact_match": "roger.l.c.lei@XXXX.com"
          }
         }
        ]
       }
      }
     ]
    }
   }
  }
 }
}

了解用于验证 RBAC authz 的 envoy 配置来自此配置。它通过混合器分发到边车,日志和代码将我引导到 servicerolebinding 的 rbac.istio.io 配置。

[root@gke-client-tf leilichao]# k get servicerolebinding -n roger-l-c-lei -o yaml
apiVersion: v1
items:
- apiVersion: rbac.istio.io/v1alpha1
  kind: ServiceRoleBinding
  metadata:
    annotations:
      role: admin
      user: roger.l.c.lei@XXXX.com
    creationTimestamp: "2020-07-04T01:35:30Z"
    generation: 5
    name: owner-binding-istio
    namespace: roger-l-c-lei
    ownerReferences:
    - apiVersion: kubeflow.org/v1
      blockOwnerDeletion: true
      controller: true
      kind: Profile
      name: roger-l-c-lei
      uid: 689c9f04-08a6-4c51-a1dc-944db1a66114
    resourceVersion: "23201026"
    selfLink: /apis/rbac.istio.io/v1alpha1/namespaces/roger-l-c-lei/servicerolebindings/owner-binding-istio
    uid: bbbffc28-689c-4099-837a-87a2feb5948f
  spec:
    roleRef:
      kind: ServiceRole
      name: ns-access-istio
    subjects:
    - properties:
        request.headers[]: roger.l.c.lei@XXXX.com
  status: {}
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""

我想尝试更新此 ServiceRoleBinding 以验证一些假设,因为我无法调试特使源代码,并且没有足够的日志来显示“允许”方法返回 false 的确切原因。

但是我发现自己无法更新服务角色绑定。每次我完成编辑后,它都会恢复到原来的版本。

我发现有这个 istio-galley validatingAdmissionConfiguration(下面的代码块)可以监控这些 istio rbac 资源。

[root@gke-client-tf leilichao]# k get validatingwebhookconfigurations istio-galley -oyaml
apiVersion: admissionregistration.k8s.io/v1beta1
kind: ValidatingWebhookConfiguration
metadata:
  creationTimestamp: "2020-08-04T15:00:59Z"
  generation: 1
  labels:
    app: galley
    chart: galley
    heritage: Tiller
    istio: galley
    release: istio
  name: istio-galley
  ownerReferences:
  - apiVersion: extensions/v1beta1
    blockOwnerDeletion: true
    controller: true
    kind: Deployment
    name: istio-galley
    uid: 11fef012-4145-49ac-a43c-2e1d0a460ea4
  resourceVersion: "22484680"
  selfLink: /apis/admissionregistration.k8s.io/v1beta1/validatingwebhookconfigurations/istio-galley
  uid: 6f485e28-3b5a-4a3b-b31f-a5c477c82619
webhooks:
- admissionReviewVersions:
  - v1beta1
  clientConfig:
    caBundle: 
    .
    .
    .
    service:
      name: istio-galley
      namespace: istio-system
      path: /admitpilot
      port: 443
  failurePolicy: Fail
  matchPolicy: Exact
  name: pilot.validation.istio.io
  namespaceSelector: {}
  objectSelector: {}
  rules:
  - apiGroups:
    - config.istio.io
    apiVersions:
    - v1alpha2
    operations:
    - CREATE
    - UPDATE
    resources:
    - httpapispecs
    - httpapispecbindings
    - quotaspecs
    - quotaspecbindings
    scope: '*'
  - apiGroups:
    - rbac.istio.io
    apiVersions:
    - '*'
    operations:
    - CREATE
    - UPDATE
    resources:
    - '*'
    scope: '*'
  - apiGroups:
    - authentication.istio.io
    apiVersions:
    - '*'
    operations:
    - CREATE
    - UPDATE
    resources:
    - '*'
    scope: '*'
  - apiGroups:
    - networking.istio.io
    apiVersions:
    - '*'
    operations:
    - CREATE
    - UPDATE
    resources:
    - destinationrules
    - envoyfilters
    - gateways
    - serviceentries
    - sidecars
    - virtualservices
    scope: '*'
  sideEffects: Unknown
  timeoutSeconds: 30
- admissionReviewVersions:
  - v1beta1
  clientConfig:
    caBundle: 
    .
    .
    .
    service:
      name: istio-galley
      namespace: istio-system
      path: /admitmixer
      port: 443
  failurePolicy: Fail
  matchPolicy: Exact
  name: mixer.validation.istio.io
  namespaceSelector: {}
  objectSelector: {}
  rules:
  - apiGroups:
    - config.istio.io
    apiVersions:
    - v1alpha2
    operations:
    - CREATE
    - UPDATE
    resources:
    - rules
    - attributemanifests
    - circonuses
    - deniers
    - fluentds
    - kubernetesenvs
    - listcheckers
    - memquotas
    - noops
    - opas
    - prometheuses
    - rbacs
    - solarwindses
    - stackdrivers
    - cloudwatches
    - dogstatsds
    - statsds
    - stdios
    - apikeys
    - authorizations
    - checknothings
    - listentries
    - logentries
    - metrics
    - quotas
    - reportnothings
    - tracespans
    scope: '*'
  sideEffects: Unknown
  timeoutSeconds: 30

长话短说

两个多星期以来,我一直在努力解决这个 istio 问题。我敢肯定有很多人在尝试在 k8s 上解决 istio 问题时也有同样的感受。欢迎任何建议!以下是我对问题的理解,如果我错了,请纠正我:

  • 日志证据显示 rbac 规则不允许我访问资源
  • 我需要更新 rbac 规则
  • 规则由mixer根据ServiceRoleBinding分配给envoy容器
  • 所以我需要更新 ServiceRoleBinding
  • 我无法更新 ServiceRoleBinding,因为验证准入 webhook 或 istio 混合器阻止我这样做

我遇到了以下问题

即使删除了验证 webhook,我也无法更新 ServiceRoleBinding

我试图删除这个验证 webhook 以更新 servicerolebinding。保存编辑后资源立即恢复。验证 webhook 实际上是从 configmap 自动生成的,所以我必须更新它来更新 webhook。

厨房中是否有某种缓存供混频器用于分发配置

我找不到任何相关日志表明 rbac.istio.io 资源受到 istio-system 命名空间中的任何服务的保护/验证。

如何获取 MIXER 的日志

我需要了解哪个组件确切地控制了策略。我设法更新了日志级别,但没有找到任何有用的东西

最重要的是如何调试特使容器

我需要调试特使应用程序以了解为什么它为允许函数返回 false。如果我们不能轻松调试它。是否有一个文档可以让我更新代码以添加更多日志并为 GCR 构建新图像,以便我可以再次运行并根据日志查看幕后发生的情况。

4

3 回答 3

1

回答我自己的问题,因为我已经取得了一些进展。

即使删除了验证 webhook,我也无法更新 ServiceRoleBinding

这是因为 ServiceRoleBinding 实际上是由kubeflow 命名空间中的配置文件控制器而不是验证 webhook生成/监控/管理的。

我遇到了这个 rbac 问题,因为基于配置文件清单文件夹中的 params.yaml,规则生成为

request.headers[]: roger.l.c.lei@XXXX.com

代替

request.headers[kubeflow-userid]: roger.l.c.lei@XXXX.com

由于我在 params.yaml中将值配置为空白而不是userid-header=kubeflow-userid

于 2020-08-15T10:00:20.773 回答
0

检查authorizationpolicy应用程序命名空间中的资源。

于 2021-07-30T02:19:09.317 回答
0

对于新集群,请参阅问题 4440 中的此评论

https://github.com/kubeflow/pipelines/issues/4440

cat << EOF | kubectl apply -f -
apiVersion: security.istio.io/v1beta1
kind: AuthorizationPolicy
metadata:
 name: bind-ml-pipeline-nb-kubeflow-user-example-com
 namespace: kubeflow
spec:
 selector:
   matchLabels:
     app: ml-pipeline
 rules:
 - from:
   - source:
       principals: ["cluster.local/ns/kubeflow-user-example-com/sa/default-editor"]
---
apiVersion: networking.istio.io/v1alpha3
kind: EnvoyFilter
metadata:
  name: add-header
  namespace: kubeflow-user-example-com
spec:
  configPatches:
  - applyTo: VIRTUAL_HOST
    match:
      context: SIDECAR_OUTBOUND
      routeConfiguration:
        vhost:
          name: ml-pipeline.kubeflow.svc.cluster.local:8888
          route:
            name: default
    patch:
      operation: MERGE
      value:
        request_headers_to_add:
        - append: true
          header:
            key: kubeflow-userid
            value: user@example.com
  workloadSelector:
    labels:
      notebook-name: test2
EOF

在我的笔记本上

import kfp
client = kfp.Client()
print(client.list_experiments())

输出

{'experiments': [{'created_at': datetime.datetime(2021, 8, 12, 9, 14, 20, tzinfo=tzlocal()),
                  'description': None,
                  'id': 'b2e552e5-3324-483a-8ec8-b32894f49281',
                  'name': 'test',
                  'resource_references': [{'key': {'id': 'kubeflow-user-example-com',
                                                   'type': 'NAMESPACE'},
                                           'name': None,
                                           'relationship': 'OWNER'}],
                  'storage_state': 'STORAGESTATE_AVAILABLE'}],
 'next_page_token': None,
 'total_size': 1}

于 2021-08-12T14:25:58.260 回答