我正在使用带有Ipython插件的 starcluster 。当我使用负载平衡模式从 Ipython 笔记本运行 Kmeans 集群时。它始终是具有 100% CPU 使用率的 Master。其他 EC2 实例从不承担负载。
我尝试使用大型数据集和 20 个节点。结果是相同的所有负载都在主服务器上。我尝试使用 node001 直接查看,但即便如此,主控也承担了所有负载。
我是否配置任何错误。我需要在配置中设置禁用队列吗?如何在所有实例上分配负载。
模板文件
[cluster iptemplate]
KEYNAME = ********
CLUSTER_SIZE = 2
CLUSTER_USER = ipuser
CLUSTER_SHELL = bash
REGION = us-west-2
NODE_IMAGE_ID = ami-04bedf34
NODE_INSTANCE_TYPE = m3.medium
#DISABLE_QUEUE = True
PLUGINS = pypackages,ipcluster
[plugin ipcluster]
SETUP_CLASS = starcluster.plugins.ipcluster.IPCluster
ENABLE_NOTEBOOK = True
NOTEBOOK_PASSWD = *****
[plugin ipclusterstop]
SETUP_CLASS = starcluster.plugins.ipcluster.IPClusterStop
[plugin ipclusterrestart]
SETUP_CLASS = starcluster.plugins.ipcluster.IPClusterRestartEngines
[plugin pypackages]
setup_class = starcluster.plugins.pypkginstaller.PyPkgInstaller
packages = scikit-learn, psutil, scikit-image, numpy, pyzmq
[plugin opencvinstaller]
setup_class = ubuntu.PackageInstaller
pkg_to_install = cmake
[plugin pkginstaller]
SETUP_CLASS = starcluster.plugins.pkginstaller.PackageInstaller
# list of apt-get installable packages
PACKAGES = python-mysqldb
代码
from IPython import parallel
clients = parallel.Client()
rc = clients.load_balanced_view()
def clustering(X_digits):
from sklearn.cluster import KMeans
kmeans = KMeans(20)
mu_digits = kmeans.fit(X_digits).cluster_centers_
return mu_digits
rc.block = True
rc.apply(clustering, X_digits)