sbs
我正在为 IBM Rhapsody文件格式构建解析器。但不幸的是,递归部分不会按预期工作。该规则pp.Word(pp.printables + " ")
可能是问题,因为它也匹配;
和{}
。但至少;
也可以是值的一部分。
import pyparsing as pp
import pprint
TEST = r"""{ foo
- key = bla;
- value = 1243; 1233; 1235;
- _hans = "hammer
time";
- HaMer = 765; 786; 890;
- value = "
#pragma LINK_INFO DERIVATIVE \"mc9s12xs256\"
";
- _mText = 12.11.2015::13:20:0;
- value = "war"; "fist";
- _obacht = "fish,car,button";
- _id = gibml c0d8-4535-898f-968362779e07;
- bam = { boing
- key = bla;
}
{ boing
- key = bla;
}
}
"""
def flat(loc, toks):
if len(toks[0]) == 1:
return toks[0][0]
assignment = pp.Suppress("-") + pp.Word(pp.alphanums + "_") + pp.Suppress("=")
value = pp.OneOrMore(
pp.Group(assignment + (
pp.Group(pp.OneOrMore(
pp.QuotedString('"', escChar="\\", multiline=True) +
pp.Suppress(";"))).setParseAction(flat) |
pp.Word(pp.alphas) + pp.Suppress(";") |
pp.Word(pp.printables + " ")
))
)
expr = pp.Forward()
expr = pp.Suppress("{") + pp.Word(pp.alphas) + (
value | (assignment + expr) | expr
) + pp.Suppress("}")
expr = expr.ignore(pp.pythonStyleComment)
print TEST
pprint.pprint(expr.parseString(TEST).asList())
输出:
% python prase.py
{ foo
- key = bla;
- value = 1243; 1233; 1235;
- _hans = "hammer
time";
- HaMer = 765; 786; 890;
- value = "
#pragma LINK_INFO DERIVATIVE \"mc9s12xs256\"
";
- _mText = 12.11.2015::13:20:0;
- value = "war"; "fist";
- _obacht = "fish,car,button";
- _id = gibml c0d8-4535-898f-968362779e07;
- bam = { boing
- key = bla;
}
{ boing
- key = bla;
}
}
['foo',
['key', 'bla'],
['value', '1243; 1233; 1235;'],
['_hans', 'hammer\n time'],
['HaMer', '765; 786; 890;'],
['value', '\n #pragma LINK_INFO DERIVATIVE "mc9s12xs256"\n '],
['_mText', '12.11.2015::13:20:0;'],
['value', ['war', 'fist']],
['_obacht', 'fish,car,button'],
['_id', 'gibml c0d8-4535-898f-968362779e07;'],
['bam', '{ boing'],
['key', 'bla']]