0

在 Flair NER 上运行 dask 时出现酸洗错误。当我没有分布式客户端时,这很有效,但是在执行时间上它似乎没有做得更好

我的代码是:

from dask.distributed import Client, progress
import dask
import time
import random
import pandas as pd
import numpy as np
import time
import dask.bag as db
import dask.dataframe as ddf
from flair.data import Sentence
from flair.models import SequenceTagger
from sklearn.datasets import fetch_20newsgroups

data = fetch_20newsgroups(subset="train")


def getFlairEntities(text):
    #passing text to sentence
    sentence = Sentence(text)
    tagger.predict(sentence)
    entites=[]
    for entity in sentence.get_spans('ner'):
        temp={}
        temp['start_pos']=entity.start_pos
        temp['end_pos']=entity.end_pos
        temp['entity']=    entity.text
        temp['entity_type']=    entity.tag
        temp['score']=    round(entity.score, 4)
        entites.append(temp)
    return entites

def ner_flair(df):
    df["entities"] = df.text.map(getFlairEntities)
    return df

if __name__ == "__main__":
    client = Client(processes=True)
    df = pd.DataFrame()
    df = df.assign(text=data["data"]).assign(target=data["target"])
    dask_dataframe = ddf.from_pandas(df.head(20), npartitions=20)
    t0 = time.time()
    result = dask_dataframe.map_partitions(ner_flair)
    dask_entities = result.compute()
    t1 = time.time()
    print("Time to process with Dask {}".format(t1-t0))

PicklingError: Can't pickle <functools._lru_cache_wrapper object at 0x00000136957653A0>: it's not the same object as flair.embeddings.token.WordEmbeddings.get_cached_vec

4

0 回答 0