假设我运行一个将文本文件作为参数的 python shell (file1.py) 。我运行它如下:
python file1.py textfile1.txt
在 file1.py 里面有以下代码
from pyspark import SparkContext
....
#I can read the file using the follwoing command
sc = SparkContext()
inputfile= sc.textFile(sys.argv[1])
我必须做哪些修改才能使 file1.py 正常运行?
但是 pyspark 对我不起作用,通常,我使用的是 spark-submit!所以在本地模式下使用 spark-submit 运行时会出现以下错误
Traceback (most recent call last):
File "/home/noorhadoop/Desktop/folder1/file1.py", line 4, in <module>
from pyspark import SparkContext
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/__init__.py", line 44, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/context.py", line 33, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line 25, in <module>
File "/usr/lib/python3.6/platform.py", line 909, in <module>
"system node release version machine processor")
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
Error in sys.excepthook:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/apport_python_hook.py", line 63, in apport_excepthook
from apport.fileutils import likely_packaged, get_recent_crashes
File "/usr/lib/python3/dist-packages/apport/__init__.py", line 5, in <module>
from apport.report import Report
File "/usr/lib/python3/dist-packages/apport/report.py", line 21, in <module>
from urllib.request import urlopen
File "/usr/lib/python3.6/urllib/request.py", line 88, in <module>
import http.client
File "/usr/lib/python3.6/http/client.py", line 71, in <module>
import email.parser
File "/usr/lib/python3.6/email/parser.py", line 12, in <module>
from email.feedparser import FeedParser, BytesFeedParser
File "/usr/lib/python3.6/email/feedparser.py", line 27, in <module>
from email._policybase import compat32
File "/usr/lib/python3.6/email/_policybase.py", line 9, in <module>
from email.utils import _has_surrogates
File "/usr/lib/python3.6/email/utils.py", line 31, in <module>
import urllib.parse
File "/usr/lib/python3.6/urllib/parse.py", line 227, in <module>
_DefragResultBase = namedtuple('DefragResult', 'url fragment')
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
Original exception was:
Traceback (most recent call last):
File "/home/noorhadoop/Desktop/folder1/file1.py", line 4, in <module>
from pyspark import SparkContext
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/__init__.py", line 44, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/context.py", line 33, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line 25, in <module>
File "/usr/lib/python3.6/platform.py", line 909, in <module>
"system node release version machine processor")
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
hduser@noorhadoop-virtual-machine:/usr/local/spark$ ./bin/spark-submit --master local[3] /home/noorhadoop/Desktop/folder1/file1.py /home/noorhadoop/Desktop/folder1/simple1.txt
Traceback (most recent call last):
File "/home/noorhadoop/Desktop/folder1/file1.py", line 4, in <module>
from pyspark import SparkContext
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/__init__.py", line 44, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/context.py", line 33, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line 25, in <module>
File "/usr/lib/python3.6/platform.py", line 909, in <module>
"system node release version machine processor")
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
Error in sys.excepthook:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/apport_python_hook.py", line 63, in apport_excepthook
from apport.fileutils import likely_packaged, get_recent_crashes
File "/usr/lib/python3/dist-packages/apport/__init__.py", line 5, in <module>
from apport.report import Report
File "/usr/lib/python3/dist-packages/apport/report.py", line 21, in <module>
from urllib.request import urlopen
File "/usr/lib/python3.6/urllib/request.py", line 88, in <module>
import http.client
File "/usr/lib/python3.6/http/client.py", line 71, in <module>
import email.parser
File "/usr/lib/python3.6/email/parser.py", line 12, in <module>
from email.feedparser import FeedParser, BytesFeedParser
File "/usr/lib/python3.6/email/feedparser.py", line 27, in <module>
from email._policybase import compat32
File "/usr/lib/python3.6/email/_policybase.py", line 9, in <module>
from email.utils import _has_surrogates
File "/usr/lib/python3.6/email/utils.py", line 31, in <module>
import urllib.parse
File "/usr/lib/python3.6/urllib/parse.py", line 227, in <module>
_DefragResultBase = namedtuple('DefragResult', 'url fragment')
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
Original exception was:
Traceback (most recent call last):
File "/home/noorhadoop/Desktop/folder1/file1.py", line 4, in <module>
from pyspark import SparkContext
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/__init__.py", line 44, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/context.py", line 33, in <module>
File "<frozen importlib._bootstrap>", line 961, in _find_and_load
File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line 25, in <module>
File "/usr/lib/python3.6/platform.py", line 909, in <module>
"system node release version machine processor")
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 381, in namedtuple
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
谢谢,