在下面的课程foo
中foomodule.py
,我在run_with_multiprocessing
方法中遇到错误。该方法将记录数分解self._data
为块并 somefunc()
使用数据的子集进行调用,例如somefunc(data[0:800], 800)
在第一次迭代中, if limit = 800
。
run_with_multiprocessing
我这样做了,因为运行 10 * 1k 记录与 1 * 10k 记录相比,在执行相同操作的函数变体中显示了巨大的性能改进,只是没有多处理。现在我想用它multiprocessing
来看看我是否可以进一步提高性能。
我在 Windows 8.1 上运行 python 3.8.2。我对 python 和多处理相当陌生。非常感谢你的帮助。
# foomodule.py
import multiprocessing
class foo:
def __init__(self, data, record_count):
self._data = data
self._record_count = record_count
def some_func(self, data, record_count):
# looping through self._data and doing some work
def run_with_multiprocessing(self, limit):
step = 0
while step < self._record_count:
if self._record_count - step < limit:
proc = multiprocessing.Process(target=self.some_func, args=(self._data[step:self._record_count], self._record_count-step))
proc.start()
proc.join()
step = self._record_count
break
proc = multiprocessing.Process(target=self.some_func, args=(self._data[step:self._record_count], self._record_count-step))
proc.start()
proc.join()
step += limit
return
在中使用类时script.py
,出现以下错误:
import foomodule
# data is a mysql result set with, say, 10'000 rows
start = time.time()
bar = foomodule.foo(data, 10000)
limit = 800
bar.run_with_multiprocessing(limit)
end = time.time()
print("finished after " + str(round(end-start, 2)) + "s")
Traceback (most recent call last):
File "C:/coding/python/project/script.py", line 29, in <module>
bar.run_with_multiprocessing(limit)
File "C:\coding\python\project\foomodule.py", line 303, in run_with_multiprocessing
proc.start()
File "C:\...\Python\Python38-32\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\...\Python\Python38-32\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\...\Python\Python38-32\lib\multiprocessing\context.py", line 326, in _Popen
return Popen(process_obj)
File "C:\...\Python\Python38-32\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\...\Python\Python38-32\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\...\Python\Python38-32\lib\socket.py", line 272, in __getstate__
raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
TypeError: cannot pickle 'SSLSocket' object