我正在尝试运行 python 多处理库来加速 csv 文件的编码。但是我遇到了这个错误:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
我确实创造了
Search = SemanticSearch(model_path, data_path, query)
if __name__ == '__main__':
query, flat, top_results = Search.search()
这指向了我课堂上的功能,
def setup(self):
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
#SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True, show_progress_bar=True)
在我的初始化函数中,我确实调用并设置了
self.map = Pool().map
有什么我缺少的提示吗?提前致谢
编辑
class SemanticSearch(object):
def __init__(self, model, data, query):
self.query = query
self.model = SentenceTransformer(model) ### Model location
self.data_path = data ###path to csv
self.corpus = None
self.texts_encodings = None
self.start = None
self.map = Pool().map
def setup(self):
print('here')
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
# SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
# SemanticSearch.encode(self)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True,
show_progress_bar=True) ##encode to invisible layer
def search(self):
SemanticSearch.setup(self)
if __name__ == "__main__":
model_path = r'data\BERT_MODELS\fine-tuned\multi-qa-MiniLM-L6-cos-v1'
data_path = 'data/raw_data/Jira-2_14_2022.csv'
query = 'query'
Search = SemanticSearch(model_path, data_path, query)
query, flat, top_results = Search.search()