我正在使用 mysqldb/python 将一些数据推送到 mysql 数据库中。
该脚本为数据解析一堆 XML 文件。
MySQL 服务器似乎退出并给我一个“#2002 - 服务器没有响应(或本地 MySQL 服务器的套接字未正确配置)”错误在事务中途 - 每次我运行它时都在不同的地方(所以我我假设它不是导致它崩溃的特定数据......)
它完美地工作,直到它达到〜12或13个文件,它给了我这个错误:
Error 2003: Can't connect to MySQL server on 'localhost' (10055)
Traceback (most recent call last):
File "sigFileParser.py", line 113, in <module>
doParser(sigfile_filename)
File "sigFileParser.py", line 106, in
doParser
doFormatsPush(packedFormats)
File "sigFileParser.py", line 27, in
doFormatsPush
sys.exit (1)
NameError: global name 'sys' is not defined
一旦发生错误,我就无法进入 MySQL 控制台或通过 PHOPmyadmin
如果我离开一段时间,我可以回到 MySQL
MySQL 表:
CREATE TABLE IF NOT EXISTS patterns
(Version int(3),
DateCreated DATETIME,
SigID int(4),
SigSpecificity CHAR(10),
ByteSeqReference CHAR(12),
MinFragLength int(4),
Position int(4),
SubSeqMaxOffset int(4),
SubSeqMinOffset int(4),
Pattern TEXT)
和
CREATE TABLE IF NOT EXISTS formats
(Version int(3),
DateCreated DATETIME,
FormatID int(4),
FormatName TEXT,
PUID TEXT,
FormatVersion TEXT,
FormatMIMEType TEXT,
InternalSignatureID int(4),
Extension TEXT,
HasPriorityOverFileFormatID int(4))
py代码
from lxml import etree
import re, os, MySQLdb
def doPatternPush(packedPatterns):
try:
db = MySQLdb.connect (host = "localhost", user = "root", passwd = "", db = "sigfiles")
c = db.cursor()
c.execute('''INSERT INTO sigfiles.patterns
(Version,DateCreated,SigID,SigSpecificity,ByteSeqReference,MinFragLength,Position,SubSeqMaxOffset,SubSeqMinOffset,Pattern)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', packedPatterns)
db.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
return (db)
def doFormatsPush(packedFormats):
try:
db = MySQLdb.connect (host = "localhost", user = "root", passwd = "", db = "sigfiles")
c = db.cursor()
c.execute('''INSERT INTO sigfiles.formats
(Version,DateCreated,FormatID,FormatName,PUID,FormatVersion,FormatMIMEType,InternalSignatureID,Extension,HasPriorityOverFileFormatID)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', packedFormats)
db.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
return(db)
def doParser(sigfile_filename):
tree = etree.parse(sigfile_filename)
root = tree.getroot()
attributes = root.attrib
if 'DateCreated' in root.attrib:
DateCreated = (attributes["DateCreated"])
if 'Version' in root.attrib:
Version = (attributes["Version"])
##--------- get internal sig details ------------------
for a in range (len(root[0])): #loops for sig ID
attributes = root[0][a].attrib
SigID=(attributes["ID"])
SigSpecificity = (attributes["Specificity"])
for b in range (len(root[0][a])): # loops for sequence pattern inside each sig
attributes = root[0][a][b].attrib
if 'Reference' in root[0][a][b].attrib:
ByteSeqReference = (attributes["Reference"])
else:
ByteSeqReference = "NULL"
attributes = root[0][a][b][0].attrib
if 'MinFragLength' in root[0][a][b][0].attrib:
MinFragLength=(attributes["MinFragLength"])
else:
MinFragLength=''
if 'Position' in root[0][a].attrib:
Position=(attributes["Position"])
else:
Position=''
if 'SubSeqMaxOffset' in root[0][a][b][0].attrib:
SubSeqMaxOffset=(attributes["SubSeqMaxOffset"])
else:
SubSeqMaxOffsee = ''
if 'SubSeqMinOffset' in root[0][a][b][0].attrib:
SubSeqMinOffset=(attributes["SubSeqMinOffset"])
else:
SubSeqMinOffset = ''
Pattern = root[0][a][b][0][0].text
packedPatterns = [Version,DateCreated,SigID,SigSpecificity,ByteSeqReference,MinFragLength,Position,SubSeqMaxOffset,SubSeqMinOffset,Pattern]
doPatternPush(packedPatterns)
##-------- get format ID details-------------
for a in range (len(root[1])):
attributes = root[1][a].attrib
if 'ID' in root[1][a].attrib:
FormatID = (attributes['ID'])
else:
FormatID = "NULL"
if 'Name' in root[1][a].attrib:
FormatName = (attributes['Name'])
else:
FormatName = "NULL"
if 'PUID' in root[1][a].attrib:
PUID = (attributes['PUID'])
else:
PUID = "NULL"
if 'Version' in root[1][a].attrib:
FormatVersion = (attributes['Version'])
else:
FormatVersion = "NULL"
if 'MIMEType' in root[1][a].attrib:
FormatMIMEType = (attributes['MIMEType'])
else:
FormatMIMEType = "NULL"
InternalSignatureID,Extension,HasPriorityOverFileFormatID = ('', 'NULL', '')
for b in range (len(root[1][a])): #extracts the tags for each format ID
tagType = root[1][a][b].tag
tagText = root[1][a][b].text
tagType = re.sub('{http://www.nationalarchives.gov.uk/pronom/SignatureFile}', '', tagType)
if tagType == 'InternalSignatureID':
InternalSignatureID = tagText
elif tagType == 'Extension':
Extension = tagText
HasPriorityOverFileFormatID = ''
else:
HasPriorityOverFileFormatID = tagText
Extension = 'NULL'
packedFormats = [Version,DateCreated,FormatID,FormatName,PUID,FormatVersion,FormatMIMEType,InternalSignatureID,Extension,HasPriorityOverFileFormatID]
doFormatsPush(packedFormats)
if __name__ == "__main__":
path = "C:\Users\NDHA\Desktop\droid sigs all"
for (path, dirs, files) in os.walk(path):
for file in files:
sigfile_filename = str(path)+"\\"+str(file)
doParser(sigfile_filename)
print sigfile_filename
db.close()
所有的 XML 都来自这里: http: //www.nationalarchives.gov.uk/aboutapps/pronom/droid-signature-files.htm