我使用此脚本生成必须采用某种格式的自定义 XML 文件。它查询数据库并将结果转换为一个大的 xml 文件。我对从库存零件清单到员工记录的多个数据库执行此操作。
import csv
import StringIO
import time
import MySQLdb
import lxml.etree
import lxml.builder
from datetime import datetime
import string
from lxml import etree
from lxml.builder import E as buildE
from datetime import datetime
from time import sleep
import shutil
import glob
import os
import logging
def logWrite(message):
logging.basicConfig(
filename="C:\\logs\\XMLSyncOut.log",
level=logging.DEBUG,
format='%(asctime)s %(message)s',
datefmt='%m/%d/%Y %I:%M:%S: %p'
)
logging.debug(message)
def buildTag(tag,parent=None,content=None):
element = buildE(tag)
if content is not None:
element.text = unicode(content)
if parent is not None:
parent.append(element)
return element
def fetchXML(cursor):
logWrite("constructing XML from cursor")
fields = [x[0] for x in cursor.description]
doc = buildTag('DATA')
for record in cursor.fetchall():
r = buildTag('ROW',parent=doc)
for (k,v) in zip(fields,record):
buildTag(k,content=v,parent=r)
return doc
def updateDatabase 1():
try:
conn = MySQLdb.connect(host = 'host',user = 'user',passwd = 'passwd',db = 'database')
cursor = conn.cursor()
except:
sys.exit(1)
logWrite("Cannot connect to database - quitting!")
cursor.execute("SELECT * FROM database.table")
logWrite("Dumping fields from database.table into cursor")
xmlFile = open("results.xml","w")
doc = fetchXML(cursor)
xmlFile.write(etree.tostring(doc,pretty_print=True))
logWrite("Writing XML results.xml")
xmlFile.close()
出于某种原因,我从 Excel 电子表格导入的一个新数据库出现了某种类型的编码错误,而其他数据库则没有。这是错误
element.text = unicode(content)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x96 in position 21: ordinal not in range(128)
我尝试通过将 buildTag 函数更改为如下所示显式编码为 ascii:
def buildTag(tag,parent=None,content=None):
element = buildE(tag)
if content is not None:
content = str(content).encode('ascii','ignore')
element.text = content
if parent is not None:
parent.append(element)
return element
这仍然没有奏效。
关于我能做些什么来阻止这种情况的任何想法?我无法逃脱它们,因为我不能将“\x92”作为输出显示在记录中。