我有关于在 table 中找到双打的信息learning
,其中entity_id
双打相同。我想通过示例来教授 Dedupe,但会出错。我究竟做错了什么?
con = psycopg2.connect(database=db_conf['NAME'],
user=db_conf['USER'],
password=db_conf['PASSWORD'],
host=db_conf['HOST'],
cursor_factory=psycopg2.extras.RealDictCursor)
con.set_client_encoding('UTF8')
c = con.cursor()
if os.path.exists(settings_file):
print('reading from ', settings_file)
with open(settings_file, 'rb') as sf:
deduper = dedupe.StaticDedupe(sf, num_cores=4)
else:
fields = [{'field': 'name', 'variable name': 'name',
'type': 'String', 'has missing': True},
{'field': 'address', 'type': 'Text',
'variable name': 'address', 'has missing': True},
{'type': 'Interaction',
'interaction variables': ['name', 'address']}
]
deduper = dedupe.Dedupe(fields, num_cores=8)
LEARNING_QUERY = "SELECT entity_id, name, address FROM learning;"
c.execute(LEARNING_QUERY) `
temp_d = dict((i, row) for i, row in enumerate(c))
deduper.markPairs(dedupe.trainingDataDedupe(temp_d, 'entity_id'))
del temp_d
deduper.train()
错误:
/home/sontata/venv/bin/python /home/sontata/PycharmProjects/test/actual_object_learning.py
INFO:root:Generating grammar tables from /usr/lib/python3.6/lib2to3/Grammar.txt
INFO:root:Generating grammar tables from /usr/lib/python3.6/lib2to3/PatternGrammar.txt
starting active labeling...
/home/sontata/venv/lib/python3.6/site-packages/rlr/lr.py:39: UserWarning: The line-search routine reaches the maximum number of evaluations.
case_weights, self.alpha))
Traceback (most recent call last):
File "/home/sontata/PycharmProjects/test/actual_object_learning.py", line 110, in <module>
deduper.train()
File "/home/sontata/venv/lib/python3.6/site-packages/dedupe/api.py", line 678, in train
self.predicates = self.active_learner.learn_predicates(
AttributeError: 'NoneType' object has no attribute 'learn_predicates'
Process finished with exit code 1