这是 ResolutionProver 的快速修复。
导致证明者不健全的问题是,当存在多个互补文字时,它没有正确实现解析规则。例如,给定子句{A B C}
和{-A -B D}
二进制解析将产生子句{A -A C D}
和{B -B C D}
. 两者都将作为重言式被丢弃。当前的 NLTK 实现将生成{C D}
.
这可能是因为子句在 NLTK 中表示为列表而引入的,因此相同的文字可能在一个子句中出现不止一次。当应用于子句{A A}
and时,此规则确实会正确生成一个空子句{-A -A}
,但通常此规则是不正确的。
似乎如果我们保持从句不重复相同的文字,我们可以通过一些改变来恢复健全性。
首先定义一个删除相同文字的函数。
这是此类功能的天真实现
import nltk.inference.resolution as res
def _simplify(clause):
"""
Remove duplicate literals from a clause
"""
duplicates=[]
for i,c in enumerate(clause):
if i in duplicates:
continue
for j,d in enumerate(clause[i+1:],start=i+1):
if j in duplicates:
continue
if c == d:
duplicates.append(j)
result=[]
for i,c in enumerate(clause):
if not i in duplicates:
result.append(clause[i])
return res.Clause(result)
现在我们可以将这个函数插入到nltk.inference.resolution
模块的一些函数中。
def _iterate_first_fix(first, second, bindings, used, skipped, finalize_method, debug):
"""
This method facilitates movement through the terms of 'self'
"""
debug.line('unify(%s,%s) %s'%(first, second, bindings))
if not len(first) or not len(second): #if no more recursions can be performed
return finalize_method(first, second, bindings, used, skipped, debug)
else:
#explore this 'self' atom
result = res._iterate_second(first, second, bindings, used, skipped, finalize_method, debug+1)
#skip this possible 'self' atom
newskipped = (skipped[0]+[first[0]], skipped[1])
result += res._iterate_first(first[1:], second, bindings, used, newskipped, finalize_method, debug+1)
try:
newbindings, newused, unused = res._unify_terms(first[0], second[0], bindings, used)
#Unification found, so progress with this line of unification
#put skipped and unused terms back into play for later unification.
newfirst = first[1:] + skipped[0] + unused[0]
newsecond = second[1:] + skipped[1] + unused[1]
# We return immediately when `_unify_term()` is successful
result += _simplify(finalize_method(newfirst,newsecond,newbindings,newused,([],[]),debug))
except res.BindingException:
pass
return result
res._iterate_first=_iterate_first_fix
同样更新 res._iterate_second
def _iterate_second_fix(first, second, bindings, used, skipped, finalize_method, debug):
"""
This method facilitates movement through the terms of 'other'
"""
debug.line('unify(%s,%s) %s'%(first, second, bindings))
if not len(first) or not len(second): #if no more recursions can be performed
return finalize_method(first, second, bindings, used, skipped, debug)
else:
#skip this possible pairing and move to the next
newskipped = (skipped[0], skipped[1]+[second[0]])
result = res._iterate_second(first, second[1:], bindings, used, newskipped, finalize_method, debug+1)
try:
newbindings, newused, unused = res._unify_terms(first[0], second[0], bindings, used)
#Unification found, so progress with this line of unification
#put skipped and unused terms back into play for later unification.
newfirst = first[1:] + skipped[0] + unused[0]
newsecond = second[1:] + skipped[1] + unused[1]
# We return immediately when `_unify_term()` is successful
result += _simplify(finalize_method(newfirst,newsecond,newbindings,newused,([],[]),debug))
except res.BindingException:
#the atoms could not be unified,
pass
return result
res._iterate_second=_iterate_second_fix
最后,将我们的函数插入clausify()
以确保输入是无重复的。
def clausify_simplify(expression):
"""
Skolemize, clausify, and standardize the variables apart.
"""
clause_list = []
for clause in res._clausify(res.skolemize(expression)):
for free in clause.free():
if res.is_indvar(free.name):
newvar = res.VariableExpression(res.unique_variable())
clause = clause.replace(free, newvar)
clause_list.append(_simplify(clause))
return clause_list
res.clausify=clausify_simplify
应用这些更改后,证明者应该运行标准测试并正确处理parentof/childof
关系。
print res.ResolutionProver().prove(q, [s], verbose=True)
输出:
[1] {-foo(Bar)} A
[2] {-parentof(z144,z143), childof(z143,z144)} A
[3] {parentof(z146,z145), -childof(z145,z146)} A
[4] {childof(z145,z146), -childof(z145,z146)} (2, 3) Tautology
[5] {-parentof(z146,z145), parentof(z146,z145)} (2, 3) Tautology
[6] {childof(z145,z146), -childof(z145,z146)} (2, 3) Tautology
False
更新:实现正确性并不是故事的结局。一种更有效的解决方案是将用于在Clause
类中存储文字的容器替换为基于内置 Python 基于哈希集的容器,但这似乎需要对证明者实现进行更彻底的返工并引入一些性能测试基础设施也是。