0

在此处输入图像描述

请参阅下面的脚本结果:我想在填充 csv 时取消括号和引号:

tagger = treetaggerwrapper.TreeTagger(TAGLANG='fr') 
def lemmatize(text):
    lemmatize_list_of_sentences= []
    lemmatize_list_of_sentences2 = []

    tags = tagger.tag_text(text)
    tags2 = treetaggerwrapper.make_tags(tags, allow_extra = True)
    lemmatize_list_of_sentences.append(tags2)
    #print(lemmatize_list_of_sentences[0])
    for subl in lemmatize_list_of_sentences: # loop in list of sublists
    #Here you create a list to work as a "inner" sentence list.
        sentence_lemmas = []
        for word in subl:
            if word.__class__.__name__ == "Tag":
                lemme=word[2] #  I want also to check if lemme[2] is empty and add this 
                lemmeOption2=lemme.split("|")
                lemme=lemmeOption2[0] #There was a typo here
                sentence_lemmas.append(lemme) #Here you append the lemma extracted

    lemmatize_list_of_sentences2.append(sentence_lemmas)

    joined_sentences= []
    for lemma_list in lemmatize_list_of_sentences2:
        joined_sentences.append(" ".join(lemma_list))
    return joined_sentences

csv_df['phrase_lemmatisée'] = csv_df['Verbatim'].apply(lemmatize)

所以任何帮助都会很棒!

4

0 回答 0