这是我为保存对象的历史所做的工作:
对于 Django 应用程序历史:
历史/__init__.py:
"""
history/__init__.py
"""
from django.core import serializers
from django.utils import simplejson as json
from django.db.models.signals import pre_save, post_save
# from http://code.google.com/p/google-diff-match-patch/
from contrib.diff_match_patch import diff_match_patch
from history.models import History
def register_history(M):
"""
Register Django model M for keeping its history
e.g. register_history(Document) - every time Document is saved,
its history (i.e. the differences) is saved.
"""
pre_save.connect(_pre_handler, sender=M)
post_save.connect(_post_handler, sender=M)
def _pre_handler(signal, sender, instance, **kwargs):
"""
Save objects that have been changed.
"""
if not instance.pk:
return
# there must be a before, if there's a pk, since
# this is before the saving of this object.
before = sender.objects.get(pk=instance.pk)
_save_history(instance, _serialize(before).get('fields'))
def _post_handler(signal, sender, instance, created, **kwargs):
"""
Save objects that are being created (otherwise we wouldn't have a pk!)
"""
if not created:
return
_save_history(instance, {})
def _serialize(instance):
"""
Given a Django model instance, return it as serialized data
"""
return serializers.serialize("python", [instance])[0]
def _save_history(instance, before):
"""
Save two serialized objects
"""
after = _serialize(instance).get('fields',{})
# All fields.
fields = set.union(set(before.keys()),set(after.keys()))
dmp = diff_match_patch()
diff = {}
for field in fields:
field_before = before.get(field,False)
field_after = after.get(field,False)
if field_before != field_after:
if isinstance(field_before, unicode) or isinstance(field_before, str):
# a patch
diff[field] = dmp.diff_main(field_before,field_after)
else:
diff[field] = field_before
history = History(history_for=instance, diff=json.dumps(diff))
history.save()
历史/models.py
"""
history/models.py
"""
from django.db import models
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from contrib import diff_match_patch as diff
class History(models.Model):
"""
Retain the history of generic objects, e.g. documents, people, etc..
"""
content_type = models.ForeignKey(ContentType, null=True)
object_id = models.PositiveIntegerField(null=True)
history_for = generic.GenericForeignKey('content_type', 'object_id')
diff = models.TextField()
def __unicode__(self):
return "<History (%s:%d):%d>" % (self.content_type, self. object_id, self.pk)
希望对某人有所帮助,并将不胜感激。
请注意,这并没有解决我最关心的竞争条件。如果在 _pre_handler "before = sender.objects.get(pk=instance.pk)" 在另一个实例保存之前被调用,但在另一个实例更新历史记录之后,当前实例首先保存,将会有一个 'broken历史”(即乱序)。值得庆幸的是 diff_match_patch 尝试优雅地处理“非致命”中断,但不能保证成功。
一种解决方案是原子性。不过,我不确定如何使上述竞争条件(即_pre_handler)成为跨所有Django 实例的原子操作。HistoryLock 表或内存中的共享哈希(memcached?)会很好 - 建议?
如前所述,另一种解决方案是协调算法。但是,并发保存可能存在“真正的”冲突,需要用户干预才能确定正确的协调。
显然,将历史拼凑起来并不是上述片段的一部分。