我是编程和使用 Python 的初学者。目前,我正在尝试理解 Jurafsky 和 Martin 2008 年关于语音和语言处理的书(关于句法解析的练习 13.1)中的一些代码。我将它复制到下面(除了最后4行,我没有自己编写此代码)。
我的问题很简单:我没有打印语法规则,而是得到如下输出:
set([<__main__.Rule object at 0x011E1810>, <__main__.Rule object at 0x011E1790>, <__main__.Rule object at 0x011E15F0>, ...)
我知道我应该对str (self) 做一些事情,但是我尝试了一些事情,但仍然没有得到正常的输出。我怀疑解决方案很简单,但我只是不知道该怎么做。很感谢任何形式的帮助。可能您不需要阅读和理解下面的所有代码即可查看哪些代码不起作用。
非常感谢!
def chomsky_normal_form(grammar):
grammar = set(grammar)
nonterminals = set(rule.head for rule in grammar)
# remove single symbol nonterminal rules
for rule, symbol in _unary_rules(grammar, nonterminals):
grammar.discard(rule)
for rule2 in _rules_headed_by(grammar, symbol):
grammar.add(Rule(rule.head, tuple(rule2.symbols)))
if all(symbol not in rule.symbols for rule in grammar):
for rule2 in _rules_headed_by(grammar, symbol):
grammar.discard(rule2)
# move terminals to their own rules
for rule in list(grammar):
if len(rule.symbols) >= 2:
for i, symbol in enumerate(rule.symbols):
if all(rule.head != symbol for rule in grammar):
rule = _new_symbol(grammar, rule, i, i + 1)
# ensure there are only two nonterminals per rule
for rule in _multi_symbol_rules(grammar):
_new_symbol(grammar, rule, 0, 2)
# return the grammar in CNF
return grammar
# find A -> B rules, allowing concurrent modifications
def _unary_rules(grammar, nonterminals):
while True:
g = ((rule, rule.symbols[0])
for rule in grammar
if len(rule.symbols) == 1
if rule.symbols[0] in nonterminals)
yield g.next()
# find all rules headed by the given symbol
def _rules_headed_by(grammar, symbol):
return [rule for rule in grammar if rule.head == symbol]
# create a new symbol which derives the given span of symbols
def _new_symbol(grammar, rule, start, stop):
symbols = rule.symbols
new_head = '_'.join(symbols[start:stop]).upper()
new_symbols = symbols[:start] + (new_head,) + symbols[stop:]
new_rule = Rule(rule.head, new_symbols)
grammar.discard(rule)
grammar.add(new_rule)
grammar.add(Rule(new_head, symbols[start:stop]))
return new_rule
# find A -> BCD... rules, allowing concurrent modifications
def _multi_symbol_rules(grammar):
while True:
g = (rule for rule in grammar if len(rule.symbols) >= 3)
yield g.next()
# representation of a rule A -> B...C
class Rule(object):
def __init__(self, head, symbols):
self.head = head
self.symbols = symbols
self._key = head, symbols
def __eq__(self, other):
return self._key == other._key
def __hash__(self):
return hash(self._key)
def __str__(self):
rep = grammar_cnf
return rep
# build a grammar from a string of lines like "X -> YZ | b"
def get_grammar(string):
grammar = set()
for line in string.splitlines():
head, symbols_str = line.split(' -> ')
for symbols_str in symbols_str.split(' | '):
symbols = tuple(symbols_str.split())
grammar.add(Rule(head, symbols))
return grammar
grammar = get_grammar("""S -> NP VP | Aux NP VP | VP
NP -> Pronoun | Proper-Noun | Det Nominal
Nominal -> Noun | Nominal Noun | Nominal PP
VP -> Verb | Verb NP | Verb NP PP | Verb PP | VP PP
PP -> Preposition NP
Det -> that | this | a
Noun -> book | flight | meal | money
Verb -> book | include | prefer
Pronoun -> I | she | me
Proper-Noun -> Houston | TWA
Aux -> does
Preposition -> from | to | on | near | through""")
grammar_cnf = chomsky_normal_form(grammar)
print(grammar_cnf)