0

我正在尝试运行 python 多处理库来加速 csv 文件的编码。但是我遇到了这个错误:

RuntimeError: 
        An attempt has been made to start a new process before the
        current process has finished its bootstrapping phase.

        This probably means that you are not using fork to start your
        child processes and you have forgotten to use the proper idiom
        in the main module:

            if __name__ == '__main__':
                freeze_support()
                ...

        The "freeze_support()" line can be omitted if the program
        is not going to be frozen to produce an executable.

我确实创造了

Search = SemanticSearch(model_path, data_path, query)
if __name__ == '__main__':

    query, flat, top_results = Search.search()

这指向了我课堂上的功能,

def setup(self):
        with open(self.data_path, newline='') as f:  # read and sort data
            reader = csv.reader(f)
            data1 = list(reader)
        self.corpus = [x for sublist in data1 for x in sublist]  # turn into 1D list
        #SemanticSearch.encode(self)
        self.texts_encodings = self.map(self.encode, self.corpus)
        end = time.time()
        print(end - self.start)


    def encode(self):
        self.start = time.time()
        return self.model.encode(self.corpus, convert_to_tensor=True, show_progress_bar=True)

在我的初始化函数中,我确实调用并设置了

self.map = Pool().map

有什么我缺少的提示吗?提前致谢

编辑

class SemanticSearch(object):
   def __init__(self, model, data, query):
       self.query = query
       self.model = SentenceTransformer(model)  ### Model location
       self.data_path = data  ###path to csv 
       self.corpus = None
       self.texts_encodings = None
       self.start = None
       self.map = Pool().map

   def setup(self):
       print('here')
       with open(self.data_path, newline='') as f:  # read and sort data
           reader = csv.reader(f)
           data1 = list(reader)
       self.corpus = [x for sublist in data1 for x in sublist]  # turn into 1D list
       # SemanticSearch.encode(self)
       self.texts_encodings = self.map(self.encode, self.corpus)
       # SemanticSearch.encode(self)
       end = time.time()
       print(end - self.start)

   def encode(self):
       self.start = time.time()
       return self.model.encode(self.corpus, convert_to_tensor=True,
                                show_progress_bar=True)  ##encode to invisible layer

   def search(self):
       SemanticSearch.setup(self)


if __name__ == "__main__":
   model_path = r'data\BERT_MODELS\fine-tuned\multi-qa-MiniLM-L6-cos-v1'
   data_path = 'data/raw_data/Jira-2_14_2022.csv'
   query = 'query'

   Search = SemanticSearch(model_path, data_path, query)

   query, flat, top_results = Search.search()

4

0 回答 0