python - 难以创建 lxml 元素子类

Question

我正在尝试创建 Element 类的子类。我在开始时遇到了麻烦。

from lxml import etree
try:
    import docx
except ImportError:
    from docx import docx

class File(etree.ElementBase):
    def _init(self):
        etree.ElementBase._init(self)
        self.body = self.append(docx.makeelement('body'))

f = File()
relationships = docx.relationshiplist()
title    = 'File' 
subject  = 'A very special File'
creator  = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
    keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')

当我尝试打开从此代码输出的文档时，我的 Word 版本（带有兼容包的 2003）给我以下错误：“此文件是由 Word 2007 的早期测试版创建的，无法在此版本中打开。” 当我将 File 对象替换为使用 docx.newdocument() 创建的不同元素时，文档会正常显示。有什么想法/建议吗？

score 0 · Accepted Answer

我真的不明白为什么要使用名为 File 的单独类。

正如 Michael0x2a 所说，您没有放置文档标签，因此它不起作用（我认为 Word 2007 也不能读取您的文件）

但这里是更正后的代码：

from lxml import etree
try:
    import docx
except ImportError:
    from docx import docx

class File(object):
    def makeelement(tagname, tagtext=None, nsprefix='w', attributes=None,
                    attrnsprefix=None):
        '''Create an element & return it'''
        # Deal with list of nsprefix by making namespacemap
        namespacemap = None
        if isinstance(nsprefix, list):
            namespacemap = {}
            for prefix in nsprefix:
                namespacemap[prefix] = nsprefixes[prefix]
            # FIXME: rest of code below expects a single prefix
            nsprefix = nsprefix[0]
        if nsprefix:
            namespace = '{'+nsprefixes[nsprefix]+'}'
        else:
            # For when namespace = None
            namespace = ''
        newelement = etree.Element(namespace+tagname, nsmap=namespacemap)
        # Add attributes with namespaces
        if attributes:
            # If they haven't bothered setting attribute namespace, use an empty
            # string (equivalent of no namespace)
            if not attrnsprefix:
                # Quick hack: it seems every element that has a 'w' nsprefix for
                # its tag uses the same prefix for it's attributes
                if nsprefix == 'w':
                    attributenamespace = namespace
                else:
                    attributenamespace = ''
            else:
                attributenamespace = '{'+nsprefixes[attrnsprefix]+'}'

            for tagattribute in attributes:
                newelement.set(attributenamespace+tagattribute,
                               attributes[tagattribute])
        if tagtext:
            newelement.text = tagtext
        return newelement

    def __init__(self):
        super(File,self).__init__()
        self.document = self.makeelement('document')
        self.document.append(self.makeelement('body'))


f = File()
relationships = docx.relationshiplist()
title    = 'File' 
subject  = 'A very special File'
creator  = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
    keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f.document, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')

python - 难以创建 lxml 元素子类

1 回答 1

Related

Reference