我正在尝试创建 HDInsight Spark 群集。我还有一个要在 HDInsight 群集中使用的 Datalake 存储。我已生成证书以将 HDInsight 与 Datalake 连接。我已经在门户网站中完成并下载了模板和证书。现在我使用 Azure CLI 创建集群并在需要时自动删除它。
deploy.sh(我刚刚在最后一个命令中添加了详细和调试选项)
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'
# -e: immediately exit if any command has a non-zero exit status
# -o: prevents errors in a pipeline from being masked
# IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@)
usage() { echo "Usage: $0 -i <subscriptionId> -g <resourceGroupName> -n <deploymentName> -l <resourceGroupLocation>" 1>&2; exit 1; }
declare subscriptionId=""
declare resourceGroupName=""
declare deploymentName=""
declare resourceGroupLocation=""
# Initialize parameters specified from command line
while getopts ":i:g:n:l:" arg; do
case "${arg}" in
i)
subscriptionId=${OPTARG}
;;
g)
resourceGroupName=${OPTARG}
;;
n)
deploymentName=${OPTARG}
;;
l)
resourceGroupLocation=${OPTARG}
;;
esac
done
shift $((OPTIND-1))
#Prompt for parameters is some required parameters are missing
if [[ -z "$subscriptionId" ]]; then
echo "Your subscription ID can be looked up with the CLI using: az account show --out json "
echo "Enter your subscription ID:"
read subscriptionId
[[ "${subscriptionId:?}" ]]
fi
if [[ -z "$resourceGroupName" ]]; then
echo "This script will look for an existing resource group, otherwise a new one will be created "
echo "You can create new resource groups with the CLI using: az group create "
echo "Enter a resource group name"
read resourceGroupName
[[ "${resourceGroupName:?}" ]]
fi
if [[ -z "$deploymentName" ]]; then
echo "Enter a name for this deployment:"
read deploymentName
fi
if [[ -z "$resourceGroupLocation" ]]; then
echo "If creating a *new* resource group, you need to set a location "
echo "You can lookup locations with the CLI using: az account list-locations "
echo "Enter resource group location:"
read resourceGroupLocation
fi
#templateFile Path - template file to be used
templateFilePath="template.json"
if [ ! -f "$templateFilePath" ]; then
echo "$templateFilePath not found"
exit 1
fi
#parameter file path
parametersFilePath="parameters.json"
if [ ! -f "$parametersFilePath" ]; then
echo "$parametersFilePath not found"
exit 1
fi
if [ -z "$subscriptionId" ] || [ -z "$resourceGroupName" ] || [ -z "$deploymentName" ]; then
echo "Either one of subscriptionId, resourceGroupName, deploymentName is empty"
usage
fi
#login to azure using your credentials
az account show 1> /dev/null
if [ $? != 0 ];
then
az login
fi
#set the default subscription id
az account set --subscription $subscriptionId
set +e
#Check for existing RG
az group show --name $resourceGroupName 1> /dev/null
if [ $? != 0 ]; then
echo "Resource group with name" $resourceGroupName "could not be found. Creating new resource group.."
set -e
(
set -x
az group create --name $resourceGroupName --location $resourceGroupLocation 1> /dev/null
)
else
echo "Using existing resource group..."
fi
#Start deployment
echo "Starting deployment..."
(
set -x
az group deployment create --name "$deploymentName" --resource-group "$resourceGroupName" --template-file "$templateFilePath" --parameters "@${parametersFilePath}" --verbose --debug
)
if [ $? == 0 ];
then
echo "Template has been successfully deployed"
fi
模板.json
{
"$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
"contentVersion": "0.9.0.0",
"parameters": {
"clusterName": {
"type": "string",
"metadata": {
"description": "The name of the HDInsight cluster to create."
}
},
"clusterLoginUserName": {
"type": "string",
"defaultValue": "admin",
"metadata": {
"description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards."
}
},
"clusterLoginPassword": {
"type": "securestring",
"metadata": {
"description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter."
}
},
"location": {
"type": "string",
"defaultValue": "westeurope",
"metadata": {
"description": "The location where all azure resources will be deployed."
}
},
"clusterVersion": {
"type": "string",
"defaultValue": "3.6",
"metadata": {
"description": "HDInsight cluster version."
}
},
"clusterWorkerNodeCount": {
"type": "int",
"defaultValue": 4,
"metadata": {
"description": "The number of nodes in the HDInsight cluster."
}
},
"clusterKind": {
"type": "string",
"defaultValue": "SPARK",
"metadata": {
"description": "The type of the HDInsight cluster to create."
}
},
"sshUserName": {
"type": "string",
"defaultValue": "sshuser",
"metadata": {
"description": "These credentials can be used to remotely access the cluster."
}
},
"sshPassword": {
"type": "securestring",
"metadata": {
"description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter."
}
},
"identityCertificate": {
"type": "securestring"
},
"identityCertificatePassword": {
"type": "securestring"
}
},
"resources": [
{
"apiVersion": "2015-03-01-preview",
"name": "[parameters('clusterName')]",
"type": "Microsoft.HDInsight/clusters",
"location": "[parameters('location')]",
"dependsOn": [],
"properties": {
"clusterVersion": "[parameters('clusterVersion')]",
"osType": "Linux",
"tier": "standard",
"clusterDefinition": {
"kind": "[parameters('clusterKind')]",
"componentVersion": {
"Spark": "2.3"
},
"configurations": {
"gateway": {
"restAuthCredential.isEnabled": true,
"restAuthCredential.username": "[parameters('clusterLoginUserName')]",
"restAuthCredential.password": "[parameters('clusterLoginPassword')]"
},
"core-site": {
"fs.defaultFS": "adl://home",
"dfs.adls.home.hostname": "vivienda.azuredatalakestore.net",
"dfs.adls.home.mountpoint": "/clusters/vivienda/"
},
"clusterIdentity": {
"clusterIdentity.applicationId": "5e6237dc-897d-4a94-9913-e25a987d00bc",
"clusterIdentity.certificate": "[parameters('identityCertificate')]",
"clusterIdentity.aadTenantId": "https://login.windows.net/c1c86fed-0aa0-465e-92be-5b97e2b584f9",
"clusterIdentity.resourceUri": "https://datalake.azure.net/",
"clusterIdentity.certificatePassword": "[parameters('identityCertificatePassword')]"
}
}
},
"storageProfile": {
"storageaccounts": []
},
"computeProfile": {
"roles": [
{
"autoScale": null,
"name": "headnode",
"minInstanceCount": 1,
"targetInstanceCount": 2,
"hardwareProfile": {
"vmSize": "Standard_D12_V2"
},
"osProfile": {
"linuxOperatingSystemProfile": {
"username": "[parameters('sshUserName')]",
"password": "[parameters('sshPassword')]"
}
},
"virtualNetworkProfile": null,
"scriptActions": []
},
{
"autoScale": null,
"name": "workernode",
"targetInstanceCount": 4,
"hardwareProfile": {
"vmSize": "Standard_D13_V2"
},
"osProfile": {
"linuxOperatingSystemProfile": {
"username": "[parameters('sshUserName')]",
"password": "[parameters('sshPassword')]"
}
},
"virtualNetworkProfile": null,
"scriptActions": []
}
]
}
}
}
]
}
parameters.json(我认为我的问题来自这个文件)
{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"clusterName": {
"value": "cname"
},
"clusterLoginUserName": {
"value": "admin"
},
"clusterLoginPassword": {
"value": null
},
"location": {
"value": "westeurope"
},
"clusterWorkerNodeCount": {
"value": 4
},
"clusterKind": {
"value": "SPARK"
},
"clusterVersion": {
"value": "3.6"
},
"sshUserName": {
"value": "sshuser"
},
"sshPassword": {
"value": null
},
"identityCertificate": {
"value": null
},
"identityCertificatePassword": {
"value": null
}
}
}
在 parameter.json 中需要填写“ clusterLoginPassword ”、“ sshPassword ”、“ identityCertificate ”和“ identityCertificatePassword ”。我尝试以不同的方式填写这些字段,但我都遇到了错误。
- 3 个密码字段,字符串值为“XXXXX”,identityCertificate 为证书文件“cert-download.pfx”的路径。
结果:
"code": "BadRequest",
"message": "User input validation failed. Errors: The request payload is invalid. The input is not a valid Base-64 string as it contains a non-base 64 character, more than two padding characters, or an illegal character among the padding characters. "
- 将密码转换为 base 64
结果:
"code": "BadRequest",
"message": "User input validation failed. Errors: The request payload is invalid. The input is not a valid Base-64 string as it contains a non-base 64 character, more than two padding characters, or an illegal character among the padding characters. "
- 正如我在模板文件中看到的所有类型为securestring的文件,使用以下 powershell 代码我得到了 base64 中证书的内容:
PS /tmp/azure> $certPassword = "XXXXXXXX"
PS /tmp/azure> $certFilePath = "cert-download.pfx"
PS /tmp/azure> $certPasswordSecureString = ConvertTo-SecureString $certPassword -AsPlainText -Force
PS /tmp/azure> $certificatePFX = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($certFilePath, $certPasswordSecureString)
PS /tmp/azure> $credential = [System.Convert]::ToBase64String($certificatePFX.GetRawCertData())
PS /tmp/azure> $credential
MIICnjCCAYagAwIBAgIQTOVQViiwq4NKlPOYRGjZ8TANBgkqhkiG9w0BAQUFADALMQkwBwYDVQQDEwAwHhcNMTgxMDIzMjIwMDAwWhcNMTkxMDIzMjIwMDAwWjALMQkwBwYDVQQDEwAwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCV2J/jquMaey+iEzjheH/J860T3J3B3Cc6kW7nBmSeGd436JD6RMxoeYGC66kq15VMFh6sscU8a0I3QLJtQaOPRk03bLftSilfVZelblVf0530GYqddrWGRH2JVPX850/25mXuPEtbFkURVFrBDS7YEyQsIQUdJpnCwqvRjMzrwKm3WfwWrA02gs8xbjr9JokHdZE1wXj7nYAVuI+fsNAQFCtC1veXw+3ofxd8dEY0hSWb8209ug0j67OuVPhFqEaCbit+pCqq4shoYY/14Bf4jZhkm8ssED78oDQGqLRlknqDuD8m4gV8Ln9UTEfgdTU2eKKTQsOFMSY6dV8H26gdAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAG+IEqkQbuHpbS1viqs8IyqIuVTDron+ZCp0rPv/4i17KaEyO/40zMOies8XCwJRDKKcFeLEmVXyCPVVY9Fn3UyRtCA9T7eams1reBX/hBy+vUeeYPKCBNpsAk9akLFdxq8zYIPw9CNFV4imuM/UFYrF+dmYeMeYENvWGpYG0LtSWfm6T9z+T3+iOhVQ1r7ZE10ZHB4IFGIlYd7HThNDicQHbtZMXus8iQllTcPJmU8heYuuHoi5we5LAwFnrvqgGdcGKwMY+9OLll7bna+9ZzojMea+TkHCNN+KY07M3YxgI6sGE7CmUNotCmGChFCpie+7D8rqPSql8zV+FnnK6ps=
我把结果放在参数文件中。
结果:
"code": "BadRequest",
"message": "DeploymentDocument 'AmbariConfiguration_1_7' failed the validation. Error: 'Service Principal Details are invalid - The private key is not present in the X.509 certificate.',DeploymentDocument 'AmbariConfiguration_1_7' failed the validation. Error: 'Error while getting access to the datalake storage account vivienda: The private key is not present in the X.509 certificate..'"
我也读过Key vault,但我还没有从 parameters.json 文件中看到如何使用它。
所以我需要帮助以正确的方式填写 parameters.json :)