1

我需要一些帮助来了解登录 Databricks 的各种形式。我正在使用 Terraform 预配 Azure Databricks 我想知道下面两个代码的区别当我使用选项 1 时,我收到如下所示的错误

选项1:

  required_providers {
    azuread     = "~> 1.0"
    azurerm     = "~> 2.0"
    azuredevops = { source = "registry.terraform.io/microsoft/azuredevops", version = "~> 0.0" }
    databricks  = { source = "registry.terraform.io/databrickslabs/databricks", version = "~> 0.0" }
  }
}

provider "random" {}
provider "azuread" {
  tenant_id     = var.project.arm.tenant.id
  client_id     = var.project.arm.client.id
  client_secret = var.secret.arm.client.secret
}

provider "databricks" {
  host          = azurerm_databricks_workspace.db-workspace.workspace_url
  azure_use_msi = true
}

resource "azurerm_databricks_workspace" "db-workspace" {
  name                          = module.names-db-workspace.environment.databricks_workspace.name_unique
  resource_group_name           = module.resourcegroup.resource_group.name
  location                      = module.resourcegroup.resource_group.location
  sku                           = "premium"
  public_network_access_enabled = true

  custom_parameters {
    no_public_ip                                         = true
    virtual_network_id                                   = module.virtualnetwork["centralus"].virtual_network.self.id
    public_subnet_name                                   = module.virtualnetwork["centralus"].virtual_network.subnets["db-sub-1-public"].name
    private_subnet_name                                  = module.virtualnetwork["centralus"].virtual_network.subnets["db-sub-2-private"].name
    public_subnet_network_security_group_association_id  = module.virtualnetwork["centralus"].virtual_network.nsgs.associations.subnets["databricks-public-nsg-db-sub-1-public"].id
    private_subnet_network_security_group_association_id = module.virtualnetwork["centralus"].virtual_network.nsgs.associations.subnets["databricks-private-nsg-db-sub-2-private"].id
  }
  tags = local.tags
}

Databricks 集群创建

resource "databricks_cluster" "dbcselfservice" {
  cluster_name            = format("adb-cluster-%s-%s", var.project.name, var.project.environment.name)
  spark_version           = var.spark_version
  node_type_id            = var.node_type_id
  autotermination_minutes = 20
  autoscale {
    min_workers = 1
    max_workers = 7
  }
  azure_attributes {
    availability       = "SPOT_AZURE"
    first_on_demand    = 1
    spot_bid_max_price = 100
  }
  depends_on = [
    azurerm_databricks_workspace.db-workspace
  ]
}

Databricks 工作区 RBAC 权限

resource "databricks_group" "db-group" {
  display_name               = format("adb-users-%s", var.project.name)
  allow_cluster_create       = true
  allow_instance_pool_create = true
  depends_on = [
    resource.azurerm_databricks_workspace.db-workspace
  ]
}

resource "databricks_user" "dbuser" {
  count            = length(local.display_name)
  display_name     = local.display_name[count.index]
  user_name        = local.user_name[count.index]
  workspace_access = true
  depends_on = [
    resource.azurerm_databricks_workspace.db-workspace
  ]
}

将成员添加到 Databricks 管理员组

resource "databricks_group_member" "i-am-admin" {
  for_each  = toset(local.email_address)
  group_id  = data.databricks_group.admins.id
  member_id = databricks_user.dbuser[index(local.email_address, each.key)].id
  depends_on = [
    resource.azurerm_databricks_workspace.db-workspace
  ]
}

data "databricks_group" "admins" {
  display_name = "admins"
  depends_on = [
    #    resource.databricks_cluster.dbcselfservice,
    resource.azurerm_databricks_workspace.db-workspace
  ]
}

我在应用 TF 时得到的错误如下:

Error: User not authorized

with databricks_user.dbuser[1],
on resources.adb.tf line 80, in resource "databricks_user" "dbuser":
80: resource "databricks_user" "dbuser"{


Error: User not authorized

with databricks_user.dbuser[0],
on resources.adb.tf line 80, in resource "databricks_user" "dbuser":
80: resource "databricks_user" "dbuser"{

Error: cannot refresh AAD token: adal:Refresh request failed. Status Code =  '500'. Response body: {"error":"server_error", "error_description":"Internal server error"} Endpoint http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https%3A%2F%2Fmanagement.core.windows.net%2F

with databricks_group.db-group,
on resources.adb.tf line 80, in resource "databricks_group" "db-group":
71: resource "databricks_group" "db-group"{

错误是因为下面的这个块吗?

provider "databricks" {
  host          = azurerm_databricks_workspace.db-workspace.workspace_url
  azure_use_msi = true
}

当我点击门户中的 URL 时,我只需要自动登录。那我该怎么做呢?为什么我们需要提供两次 databricks 提供程序,一次在 required_providers 下,一次在提供程序“databricks”下?我已经看到如果我不提供第二个提供商,我会收到错误消息:

"authentication is not configured for provider"
4

2 回答 2

1

如评论中所述,如果您使用 Azure CLI 身份验证,即az login使用您的用户名和密码,则可以使用以下代码:

terraform {
  required_providers {
    databricks = {
      source = "databrickslabs/databricks"
      version = "0.3.11"
    }
  }
}
provider "azurerm" {
  features {}
}
provider "databricks" {
    host = azurerm_databricks_workspace.example.workspace_url
}

resource "azurerm_databricks_workspace" "example" {
  name                        = "DBW-ansuman"
  resource_group_name         = azurerm_resource_group.example.name
  location                    = azurerm_resource_group.example.location
  sku                         = "premium"
  managed_resource_group_name = "ansuman-DBW-managed-without-lb"

  public_network_access_enabled = true

  custom_parameters {
    no_public_ip        = true
    public_subnet_name  = azurerm_subnet.public.name
    private_subnet_name = azurerm_subnet.private.name
    virtual_network_id  = azurerm_virtual_network.example.id

    public_subnet_network_security_group_association_id  = azurerm_subnet_network_security_group_association.public.id
    private_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.private.id
  }

  tags = {
    Environment = "Production"
    Pricing     = "Standard"
  }
}
data "databricks_node_type" "smallest" {
  local_disk = true
    depends_on = [
    azurerm_databricks_workspace.example
  ]
}
data "databricks_spark_version" "latest_lts" {
  long_term_support = true
    depends_on = [
    azurerm_databricks_workspace.example
  ]
}
resource "databricks_cluster" "dbcselfservice" {
  cluster_name            = "Shared Autoscaling"
  spark_version           = data.databricks_spark_version.latest_lts.id
  node_type_id            = data.databricks_node_type.smallest.id
  autotermination_minutes = 20
  autoscale {
    min_workers = 1
    max_workers = 7
  }
  azure_attributes {
    availability       = "SPOT_AZURE"
    first_on_demand    = 1
    spot_bid_max_price = 100
  }
  depends_on = [
    azurerm_databricks_workspace.example
  ]
}
resource "databricks_group" "db-group" {
  display_name               = "adb-users-admin"
  allow_cluster_create       = true
  allow_instance_pool_create = true
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}

resource "databricks_user" "dbuser" {
  display_name     = "Rahul Sharma"
  user_name        = "example@contoso.com"
  workspace_access = true
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}
resource "databricks_group_member" "i-am-admin" {
  group_id  = databricks_group.db-group.id
  member_id = databricks_user.dbuser.id
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}

输出:

在此处输入图像描述


如果您使用Service Principalas authentication ,那么您可以使用以下内容:

terraform {
  required_providers {
    databricks = {
      source = "databrickslabs/databricks"
      version = "0.3.11"
    }
  }
}
provider "azurerm" {
  subscription_id = "948d4068-xxxx-xxxx-xxxx-e00a844e059b"
  tenant_id = "72f988bf-xxxx-xxxx-xxxx-2d7cd011db47"
  client_id = "f6a2f33d-xxxx-xxxx-xxxx-d713a1bb37c0"
  client_secret = "inl7Q~Gvdxxxx-xxxx-xxxxyaGPF3uSoL"
  features {}
}
provider "databricks" {
    host = azurerm_databricks_workspace.example.workspace_url
    azure_client_id = "f6a2f33d-xxxx-xxxx-xxxx-d713a1bb37c0"
    azure_client_secret = "inl7Q~xxxx-xxxx-xxxxg6ntiyaGPF3uSoL"
    azure_tenant_id = "72f988bf-xxxx-xxxx-xxxx-2d7cd011db47"
}


resource "azurerm_databricks_workspace" "example" {
  name                        = "DBW-ansuman"
  resource_group_name         = azurerm_resource_group.example.name
  location                    = azurerm_resource_group.example.location
  sku                         = "premium"
  managed_resource_group_name = "ansuman-DBW-managed-without-lb"

  public_network_access_enabled = true

  custom_parameters {
    no_public_ip        = true
    public_subnet_name  = azurerm_subnet.public.name
    private_subnet_name = azurerm_subnet.private.name
    virtual_network_id  = azurerm_virtual_network.example.id

    public_subnet_network_security_group_association_id  = azurerm_subnet_network_security_group_association.public.id
    private_subnet_network_security_group_association_id = azurerm_subnet_network_security_group_association.private.id
  }

  tags = {
    Environment = "Production"
    Pricing     = "Standard"
  }
}
data "databricks_node_type" "smallest" {
  local_disk = true
    depends_on = [
    azurerm_databricks_workspace.example
  ]
}
data "databricks_spark_version" "latest_lts" {
  long_term_support = true
    depends_on = [
    azurerm_databricks_workspace.example
  ]
}
resource "databricks_cluster" "dbcselfservice" {
  cluster_name            = "Shared Autoscaling"
  spark_version           = data.databricks_spark_version.latest_lts.id
  node_type_id            = data.databricks_node_type.smallest.id
  autotermination_minutes = 20
  autoscale {
    min_workers = 1
    max_workers = 7
  }
  azure_attributes {
    availability       = "SPOT_AZURE"
    first_on_demand    = 1
    spot_bid_max_price = 100
  }
  depends_on = [
    azurerm_databricks_workspace.example
  ]
}
resource "databricks_group" "db-group" {
  display_name               = "adb-users-admin"
  allow_cluster_create       = true
  allow_instance_pool_create = true
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}

resource "databricks_user" "dbuser" {
  display_name     = "Rahul Sharma"
  user_name        = "example@contoso.com"
  workspace_access = true
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}
resource "databricks_group_member" "i-am-admin" {
  group_id  = databricks_group.db-group.id
  member_id = databricks_user.dbuser.id
  depends_on = [
    resource.azurerm_databricks_workspace.example
  ]
}

为什么我们需要提供两次 databricks 提供程序,一次在 required_providers 下,一次在提供程序“databricks”下?

required_providers用于从源下载和初始化所需的提供程序,即Terraform Registry. 但Provider Block用于进一步配置下载的提供程序,如描述 client_id、功能块等,可用于身份验证或其他配置。

于 2021-11-16T07:39:13.583 回答
0

azure_use_msi选项主要用于 CI/CD 管道,这些管道在分配有托管标识的计算机上执行。文档中描述了所有可能的身份验证选项,但最简单的方法是通过 Azure CLI 使用身份验证,因此您只需将host参数保留在提供程序块中。如果该计算机上没有 Azure CLI,则可以改用主机 + 个人访问令牌的组合。

如果您从分配了托管标识的计算机上运行该代码,则需要确保将此标识添加到工作区中,或者它具有对其的贡献者访问权限 - 有关更多详细信息,请参阅Azure Databricks 文档

于 2021-11-16T07:32:44.563 回答