python - 解析嵌套字典（allen nlp hierplane_tree）

Question

我正在尝试解析由 allennlp 预测器返回的 JSON 对象。我能够找到一个有用的函数来查找所有子值，但是我真正想要对依赖项做的事情是给定一个实体“人”，我可以从 JSON 对象中获取关联的属性。

例句：“昨天我去公园散步的时候，看到一个穿蓝色衬衫的男人。”

依赖树有与实体相关联的穿着、蓝色、衬衫等。如何在该结构中为 man 取回关联的 JSON 块？我不确定如何修改我的辅助函数或开发另一个函数以从 JSON 输出中获取该块。任何帮助或建议将不胜感激。

艾伦NLP代码：

text = "When I was walking to the park yesterday, I saw a man wearing a blue shirt."
from allennlp.predictors.predictor import Predictor
import allennlp_models.structured_prediction
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/biaffine-dependency-parser-ptb-2020.04.06.tar.gz")
tree = predictor.predict(sentence=text)

tree = tree['hierplane_tree']
tree

可以让我获取子值的辅助函数：

"""Extract nested values from a JSON tree."""


def json_extract(obj, key):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values

该函数可以给我值：

# Find every instance of `name` in a Python dictionary.
children = json_extract(tree, 'word')
print(children)

['walking', 'When', 'I', 'was', 'to', 'park', 'the', 'yesterday', ',', 'saw', 'I', 'man', 'a', 'wearing', 'shirt', 'a', 'blue', '.']

JSON 提取（当我提供“人”时，我想尝试得到什么：

{'word': 'man',
        'nodeType': 'dep',
        'attributes': ['NOUN'],
        'link': 'dep',
        'spans': [{'start': 51, 'end': 55}],
        'children': [{'word': 'a',
          'nodeType': 'det',
          'attributes': ['DET'],
          'link': 'det',
          'spans': [{'start': 49, 'end': 51}]},
         {'word': 'wearing',
          'nodeType': 'dep',
          'attributes': ['VERB'],
          'link': 'dep',
          'spans': [{'start': 55, 'end': 63}],
          'children': [{'word': 'shirt',
            'nodeType': 'dep',
            'attributes': ['NOUN'],
            'link': 'dep',
            'spans': [{'start': 70, 'end': 76}],
            'children': [{'word': 'a',
              'nodeType': 'dep',
              'attributes': ['DET'],
              'link': 'dep',
              'spans': [{'start': 63, 'end': 65}]},
             {'word': 'blue',
              'nodeType': 'dep',
              'attributes': ['ADJ'],
              'link': 'dep',
              'spans': [{'start': 65, 'end': 70}]}]}]}]}]}]}

JSON输出：

{'text': 'When I was walking to the park yesterday , I saw a man wearing a blue shirt .',
 'root': {'word': 'walking',
  'nodeType': 'root',
  'attributes': ['VERB'],
  'link': 'root',
  'spans': [{'start': 11, 'end': 19}],
  'children': [{'word': 'When',
    'nodeType': 'dep',
    'attributes': ['ADV'],
    'link': 'dep',
    'spans': [{'start': 0, 'end': 5}]},
   {'word': 'I',
    'nodeType': 'nsubj',
    'attributes': ['PRON'],
    'link': 'nsubj',
    'spans': [{'start': 5, 'end': 7}]},
   {'word': 'was',
    'nodeType': 'aux',
    'attributes': ['AUX'],
    'link': 'aux',
    'spans': [{'start': 7, 'end': 11}]},
   {'word': 'to',
    'nodeType': 'prep',
    'attributes': ['ADP'],
    'link': 'prep',
    'spans': [{'start': 19, 'end': 22}],
    'children': [{'word': 'park',
      'nodeType': 'pobj',
      'attributes': ['NOUN'],
      'link': 'pobj',
      'spans': [{'start': 26, 'end': 31}],
      'children': [{'word': 'the',
        'nodeType': 'det',
        'attributes': ['DET'],
        'link': 'det',
        'spans': [{'start': 22, 'end': 26}]}]}]},
   {'word': 'yesterday',
    'nodeType': 'tmod',
    'attributes': ['NOUN'],
    'link': 'tmod',
    'spans': [{'start': 31, 'end': 41}]},
   {'word': ',',
    'nodeType': 'dep',
    'attributes': ['PUNCT'],
    'link': 'dep',
    'spans': [{'start': 41, 'end': 43}],
    'children': [{'word': 'saw',
      'nodeType': 'dep',
      'attributes': ['VERB'],
      'link': 'dep',
      'spans': [{'start': 45, 'end': 49}],
      'children': [{'word': 'I',
        'nodeType': 'nsubj',
        'attributes': ['PRON'],
        'link': 'nsubj',
        'spans': [{'start': 43, 'end': 45}]},
       {'word': 'man',
        'nodeType': 'dep',
        'attributes': ['NOUN'],
        'link': 'dep',
        'spans': [{'start': 51, 'end': 55}],
        'children': [{'word': 'a',
          'nodeType': 'det',
          'attributes': ['DET'],
          'link': 'det',
          'spans': [{'start': 49, 'end': 51}]},
         {'word': 'wearing',
          'nodeType': 'dep',
          'attributes': ['VERB'],
          'link': 'dep',
          'spans': [{'start': 55, 'end': 63}],
          'children': [{'word': 'shirt',
            'nodeType': 'dep',
            'attributes': ['NOUN'],
            'link': 'dep',
            'spans': [{'start': 70, 'end': 76}],
            'children': [{'word': 'a',
              'nodeType': 'dep',
              'attributes': ['DET'],
              'link': 'dep',
              'spans': [{'start': 63, 'end': 65}]},
             {'word': 'blue',
              'nodeType': 'dep',
              'attributes': ['ADJ'],
              'link': 'dep',
              'spans': [{'start': 65, 'end': 70}]}]}]}]}]}]},
   {'word': '.',
    'nodeType': 'punct',
    'attributes': ['PUNCT'],
    'link': 'punct',
    'spans': [{'start': 76, 'end': 78}]}]},
 'nodeTypeToStyle': {'root': ['color5', 'strong'],
  'dep': ['color5', 'strong'],
  'nsubj': ['color1'],
  'nsubjpass': ['color1'],
  'csubj': ['color1'],
  'csubjpass': ['color1'],
  'pobj': ['color2'],
  'dobj': ['color2'],
  'iobj': ['color2'],
  'mark': ['color2'],
  'pcomp': ['color2'],
  'xcomp': ['color2'],
  'ccomp': ['color2'],
  'acomp': ['color2'],
  'aux': ['color3'],
  'cop': ['color3'],
  'det': ['color3'],
  'conj': ['color3'],
  'cc': ['color3'],
  'prep': ['color3'],
  'number': ['color3'],
  'possesive': ['color3'],
  'poss': ['color3'],
  'discourse': ['color3'],
  'expletive': ['color3'],
  'prt': ['color3'],
  'advcl': ['color3'],
  'mod': ['color4'],
  'amod': ['color4'],
  'tmod': ['color4'],
  'quantmod': ['color4'],
  'npadvmod': ['color4'],
  'infmod': ['color4'],
  'advmod': ['color4'],
  'appos': ['color4'],
  'nn': ['color4'],
  'neg': ['color0'],
  'punct': ['color0']},
 'linkToPosition': {'nsubj': 'left',
  'nsubjpass': 'left',
  'csubj': 'left',
  'csubjpass': 'left',
  'pobj': 'right',
  'dobj': 'right',
  'iobj': 'right',
  'pcomp': 'right',
  'xcomp': 'right',
  'ccomp': 'right',
  'acomp': 'right'}}

score 0 · Accepted Answer

这无疑需要优化和清理，但它确实使您能够通过感兴趣的项目（在本例中为 man）解析来自 AllenNLP 的依赖关系树。希望这可以帮助其他人。

从文本中，通过提供键/值（单词作为键，人作为值）。你得到：

辅助功能：

def get_entity_attributes(obj, key, value):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
        elif isinstance(obj, list):
            for item in obj:
                if(isinstance(item,dict)):
                    ky,vl = key, value
                    if ky in item and vl == item[ky]:
#                         print(type(item), item)
                        arr.append(item)
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values

def parse_attributes(obj, key):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values

# Create list of word tokens after removing stopwords
def get_clean_list(entities):
    filtered_sentence = []

    for word in entities:
        lexeme = nlp.vocab[word]
        if not lexeme.is_stop and not lexeme.is_punct:
            filtered_sentence.append(word) 
    return filtered_sentence

查看输出：

text = "When I was walking to the park yesterday, I saw a man wearing a blue shirt."
tree = predictor.predict(sentence=text)

key = "word"
entity = "man"
entities = get_entity_attributes(tree, key, entity)

for ent in entities:
    if ent['nodeType'] == 'dep':
        attributes = parse_attributes(ent, key)
        clean_attributes = get_clean_list(attributes)
        clean_attributes.remove(entity)
        print(f'entity: {entity} Attributes: {clean_attributes}')
    else:
        attributes = parse_attributes(ent, key)
        clean_attributes = get_clean_list(attributes)
        clean_attributes.remove(entity)
        print(f'entity: {entity} Action Attributes: {clean_attributes}')

给你：

entity: man Attributes: ['wearing', 'shirt', 'blue']

python - 解析嵌套字典（allen nlp hierplane_tree）

1 回答 1

Related

Reference