0

我正在尝试使用以下代码读取工作 HDFS 位置的文件:

import hdfs3
from hdfs3 import HDFileSystem
hdfs=HDFileSystem(host='host',port='port')
with hdfs.open('FILE') as f:
    model_AOB = f.read()

我收到以下错误:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-1-d44f943ebe4e> in <module>()
      1 import hdfs3
      2 from hdfs3 import HDFileSystem
----> 3 hdfs=HDFileSystem(host='HOST',port=PORT)
      4 with hdfs.open('FILE') as f:
      5     model_AOB = f.read()

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in __init__(self, host, port, connect, autoconf, pars, **kwargs)
     86 
     87         if connect:
---> 88             self.connect()
     89 
     90     def __getstate__(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in connect(self)
    104         This happens automatically at startup
    105         """
--> 106         get_lib()
    107         conf = self.conf.copy()
    108         if self._handle:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in get_lib()
    668     global _lib
    669     if _lib is None:
--> 670         from .lib import _lib as l
    671         _lib = l
    672 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\lib.py in <module>()
     15 for name in ['libhdfs3.so', 'libhdfs3.dylib']:
     16     try:
---> 17         _lib = ct.cdll.LoadLibrary(name)
     18         break
     19     except OSError as e:

~\AppData\Local\Continuum\anaconda3\lib\ctypes\__init__.py in LoadLibrary(self, name)
    432 
    433     def LoadLibrary(self, name):
--> 434         return self._dlltype(name)
    435 
    436 cdll = LibraryLoader(CDLL)

~\AppData\Local\Continuum\anaconda3\lib\ctypes\__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error)
    354 
    355         if handle is None:
--> 356             self._handle = _dlopen(self._name, mode)
    357         else:
    358             self._handle = handle

OSError: [WinError 126] The specified module could not be found

我还尝试在pars = {"hadoop.security.authentication": "kerberos"}HDFileSystem 函数中添加参数,因为我相信 hadoop 集群是 kerberized。

任何人都可以帮助解决这个问题吗?为这个笨重的问题道歉,我是 python 新手,所以我不想意外遗漏错误中的相关内容。

谢谢

4

0 回答 0