1

我正在尝试使用 cm_api 9.0.0 从 Cloudera Manager 5.3.2 中删除主机

我试过了

api.get_cloudera_manager().hosts_decommission([host])
api.get_cluster("cluster").remove_host(host)

但 remove_host() 出错

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python2.6/site-packages/cm_api/endpoints/clusters.py", line 218, in remove_host
return self._delete("hosts/" + hostId, ApiHostRef, api_version=3)
  File "/usr/lib/python2.6/site-packages/cm_api/endpoints/types.py", line 352, in _delete
api_version)
  File "/usr/lib/python2.6/site-packages/cm_api/endpoints/types.py", line 380, in _call
api_version)
  File "/usr/lib/python2.6/site-packages/cm_api/endpoints/types.py", line 139, in call
ret = method(path, params=params)
  File "/usr/lib/python2.6/site-packages/cm_api/resource.py", line 124, in delete
return self.invoke("DELETE", relpath, params)
  File "/usr/lib/python2.6/site-packages/cm_api/resource.py", line 63, in invoke
headers=headers)
  File "/usr/lib/python2.6/site-packages/cm_api/http_client.py", line 161, in execute
raise self._exc_class(ex)
cm_api.api_client.ApiException: ip-10-0-8-187.ec2.internal still has roles assigned to it. (error 400)

从集群中删除主机的正确顺序是什么,或者 cm_api 中是否有一个命令可以执行此操作?

谢谢

4

1 回答 1

2

这个 python 脚本有助于从集群中删除主机。以下是步骤:

  1. 停止并解除主机中的所有角色
  2. 从主机中删除角色一一识别并删除角色
  3. 从集群中移除主机
  4. 从 cloudera 管理器中删除主机

此脚本从在 aws 中运行的 cloudera 托管集群中删除主机。一旦没有资源需求,它打算从集群中缩减工作节点(节点管理器角色)和网关角色。

您可以根据您的环境相应地更改脚本。

#!/bin/python
import httplib2
import os
import requests
import json
import boto3
import time
from requests.auth import HTTPBasicAuth

os.environ["AWS_ACCESS_KEY_ID"] = "ACCESS_KEY"
os.environ["AWS_SECRET_ACCESS_KEY"] = "SECRET_ACCESS_KEY"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

region='us-east-1'
metadata = requests.get(url='http://169.254.169.254/latest/meta-data/instance-id')
instance_id = metadata.text
host = requests.get(url='http://169.254.169.254/latest/meta-data/hostname')
host_id = host.text

username='admin'
password='admin'
cluster_name='cluster001'
scm_protocol='http'
scm_host='host.compute-1.amazonaws.com'
scm_port='7180'
scm_api='v17'



client = boto3.client('autoscaling')
ec2 = boto3.client('autoscaling', region_name=region)

response = client.describe_auto_scaling_instances(InstanceIds=[instance_id,])
state =  response['AutoScalingInstances'][0]['LifecycleState']
print "vm is in " + state 
if state == 'Terminating:Wait':
    print "host decommision started"
    ##decommission host
    service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/cm/commands/hostsDecommission'

    #service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/cm/hostsRecommission'
    #service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/cm/commands/hostsStartRoles'


    print service_url
    headers = {'content-type': 'application/json'}
    req_body = { "items":[ host_id ]}
    print req_body
    req = requests.post(url=service_url, auth=HTTPBasicAuth(username, password), data=json.dumps(req_body), headers=headers)
    print req.text
    time.sleep(120)         

    ##delete roles in a host
    api_url = scm_protocol + '://' + scm_host + ':' + scm_port + '/api/' + scm_api + '/hosts/' + host_id
    req = requests.get(api_url, auth=HTTPBasicAuth(username, password))
    a = json.loads(req.content)

    for i in a['roleRefs']:
        scm_uri='/api/' + scm_api + '/clusters/' + cluster_name + '/services/'+i['serviceName']+'/roles/'+i['roleName']
        scm_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + scm_uri
        print scm_url
        req = requests.delete(scm_url, auth=HTTPBasicAuth(username, password))
        print req.text
        time.sleep(10)

    ##remove host from cluster
    service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/clusters/' + cluster_name + '/hosts/' + host_id
    print service_url
    req = requests.delete(service_url, auth=HTTPBasicAuth(username, password))
    time.sleep(10)

    ##remove host from cloudera manager
    os.system("/etc/init.d/cloudera-scm-agent stop")    
    service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/hosts/' + host_id
    print service_url
    req = requests.delete(service_url, auth=HTTPBasicAuth(username, password))
    print req.text
    time.sleep(10)

    ##refresh cluster configuration
    service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/clusters/' + 'commands/refresh'
    print service_url
    req = requests.post(service_url, auth=HTTPBasicAuth(username, password))
    print req.text
    time.sleep(10)

    ##deploy client configuration
    service_url = scm_protocol + '://' +  scm_host + ':'  +  scm_port + '/api/' + scm_api + '/clusters/' + 'commands/deployClientConfig'
    print service_url
    req = requests.post(service_url, auth=HTTPBasicAuth(username, password))
    print req.text
    time.sleep(10)
于 2018-02-15T17:30:23.777 回答