2

我想做一个QAbstractItemModel从一系列 Xml 文件中获取数据的文件,所有文件都位于同一目录中。由于 PyQt5 不再支持QDomDocument(或者至少我找不到让它工作的方法),我不得不求助于QXmlStreamReader. 我将数据本身放在一个巨大的 python 字典中(嗯......不完全是计算机科学标准的巨大),其中包含各种键下的其他字典以创建树状结构。

到目前为止,这是我的代码:

class DataModel(QtCore.QAbstractItemModel):
    def __init__(self, settingsDirectory, parent = None):
        super(DataModel, self).__init__(parent)
        settingsDirectory.setNameFilters(["*.xml"])
        files = settingsDirectory.entryList()
        print(files)

        self.data = {}

        for i in range(len(files)):
            filePath = str(files[i])
            file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
            fileOpens = file.open(file.ReadOnly | file.Text)
            if fileOpens:
                parser = QtCore.QXmlStreamReader(file)
                print("--------Beginning parsing----------")
                print("Reading file: "+str(filePath))
                while not parser.atEnd():
                    parser.readNext()

                    token = parser.tokenType()

                    print("Reading tag: " + str(parser.name()))
                    print("Tag type is: " + str(token))
                    if token == parser.StartDocument:
                        self.data["XML Version"] = str(parser.documentVersion())
                        self.data["XML Encoding"] = str(parser.documentEncoding())
                    if token == parser.StartElement:
                        tokenName = parser.name()
                    if parser.tokenType() == parser.Characters:
                        tokenText = parser.text()
                        print("This tag has a text value: " + str(tokenText))
                        print("current data: " + str(self.data))
                    if token == parser.EndElement:
                        if tokenText != None:
                            self.data[tokenName] = tokenText
                        else:
                            self.data[tokenName] = {}
                        tokenName = None
                        tokenText = None
            else:
                print(self.tr("xml file did not open properly"))
        print(self.data)

虽然这段代码没有崩溃或任何事情,但它确实有一些我不知道为什么会发生或如何修复的问题:

1.由于某种原因tokenName永远不会改变- 已解决None

2.self.data字典的结构没有变成树状的,不知道为什么:|

示例数据:

<?xml version="1.0" encoding="UTF-8"?>
<tag>
    <description>This is a text</description>
    <types>
        <typesAllowed></typesAllowed>
        <typesEnabled></typesEnabled>
    </types>
</tag>

产生最终结果:

{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'typesAllowed': '\n\t\t', None: '\n', 'typesEnabled': '\n\t\t', 'description': 'This is a text'}

而不是想要的:

{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'tag': {'description': 'this is a text', typesAllowed': '\n\t\t', 'typesEnabled': '\n\t\t'}}

我知道这些问题很可能是由于我对 a 的工作原理了解不足StreamReader,因此欢迎提供任何和所有提示 :)

编辑1:

tokenName改变是一个愚蠢的定位错误,我很傻。代码反映了修复。

编辑2:

添加了示例和示例输出

4

1 回答 1

0

这个问题现在解决了;我对这个问题采取了不同的方法。

我基本上采用了一个列表,如果令牌具有属性== ,我将tuples添加到其中,并将评估的字符串(函数)放入最后一个字典中。当它遇到一个标记时,它会找到with == ,这是当前标记的名称,将其作为带有 key 的条目放入先前的字典中。(name, {})StartElementparseAs"element"parseTexttupleEndElementtuplenametokenNametuplename

关于它是如何工作的还有更多细节,但如果我包括它们(它如何知道何时提交等),我可能会使我的解释过于currData复杂self.data

class DataModel(QtCore.QAbstractItemModel):
    def __init__(self, settingsDirectory, parent = None):
        super(DataModel, self).__init__(parent)
        settingsDirectory.setNameFilters(["*.xml"])
        files = settingsDirectory.entryList()
        print(files)

        self.data = {}
        self.parsingLog = {}

        for i in range(len(files)):
            filePath = str(files[i])
            file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
            fileOpens = file.open(file.ReadOnly | file.Text)
            if fileOpens:
                parser = QtCore.QXmlStreamReader(file)

                currData = []
                haveStartToken = False

                print(self.tr("--------Beginning parsing--------"))
                print(self.tr("Reading file: "+str(filePath)))
                print(self.tr("---------------------------------"))

                while not parser.atEnd():
                    if not parser.hasError():
                        parser.readNext()
                        token = parser.tokenType()

                        print(self.tr("--------------------"))
                        print(self.tr("Token type: " + str(self.printTokenType(token))))

                        if token == parser.StartElement:
                            tokenName = parser.name()
                            attributes = parser.attributes()
                            parseAs = attributes.value("parseAs")

                            print(self.tr("Reading StartElement: " + str(tokenName)))
                            print(self.tr("parseAs: " + str(parseAs)))

                            if parseAs == "text":
                                textValue = self.parseText(parser.readElementText())
                                print(self.tr("Text Value: " + str(textValue)))

                                if len(currData) != 0:
                                    currData[len(currData)-1][1][tokenName] = textValue
                                else:
                                    print(self.tr("*******Terminating application*******"))
                                    print(self.tr("Reason: currData is empty"))
                                    print(self.tr("*******Terminating application*******"))
                                    sys.exit()
                            elif parseAs == "element":
                                currData.append((tokenName, {}))
                            else:
                                print(self.tr("******WARNING******"))
                                print(self.tr("parseAs attribute is not given correctly"))
                                print(self.tr("******WARNING******"))

                            print(self.tr("--------------------"))

                        elif token == parser.EndElement:
                            tokenName = parser.name()

                            print(self.tr("Reading EndElement: " + str(tokenName)))
                            print(self.tr("currData before: " + str(currData)))

                            if not haveStartToken:
                                startToken = currData[0][0]
                                haveStartToken = True

                            for i in currData:
                                if i[0] == tokenName:
                                    print(self.tr("Closing token: " + str(tokenName)))
                                    if i[0] != startToken:
                                        currData[len(currData)-2][1][tokenName] = currData[len(currData)-1][1]
                                        del currData[len(currData)-1]
                                        print(self.tr("currData after: " + str(currData)))
                                        print(self.tr("--------------------"))
                                    elif i[0] == startToken:
                                        print(self.tr("This is the final token, writing to self.data"), end = "")
                                        self.data[startToken] = currData[0][1]
                                        for i in range(5):
                                            time.sleep(0.25)
                                            print(self.tr("."), end = "")
                                        print(self.tr("done."))
                                        print(self.tr("--------------------"))

                        elif token == parser.Characters:
                            print(self.tr("Characters value: " + str(parser.text())))
                            print(self.tr("--------------------"))

                        elif token == parser.StartDocument:
                            self.parsingLog["File: "+str(filePath)] = {}
                            self.parsingLog["File: "+str(filePath)]["XML Version"] = str(parser.documentVersion())
                            self.parsingLog["File: "+str(filePath)]["XML Encoding"] = str(parser.documentEncoding())
                            print(self.tr("File Version: " + str(self.parsingLog["File: "+str(filePath)]["XML Version"])))
                            print(self.tr("File Encoding: " + str(self.parsingLog["File: "+str(filePath)]["XML Encoding"])))

                        elif token == parser.EndDocument:
                            print(self.tr("Cleaning up"), end = "")
                            for i in range(5):
                                time.sleep(0.25)
                                print(self.tr("."), end = "")
                            time.sleep(0.1)
                            print(self.tr("done."))
                            print(self.tr("self.data: " + str(self.data)))
                            print(self.tr("types of data: yesNo (should be str) - " +
                                          str(type(self.data["building"]["specialSlot"]["yesNo"])) +
                                          " - id - should be int - " + str(type(self.data["building"]["specialSlot"]["id"])) +
                                          " - isItFloat - should be float - " + str(type(self.data["building"]["specialSlot"]["isItFloat"]))))
                            print(self.tr("--------------------"))

                    else:
                        print(self.tr("XML file is not well-formatted"))



            else:
                print(self.tr("xml file did not open properly"))

    def parseText(self, text):
        if isinstance(text, str):
            if text == "":
                return str(text)
            for i in text:
                if i not in ("0123456789."):
                    return str(text)
            for j in text:
                if j not in ("0123456789"):
                    return float(text)
            return int(text)
        else:
            return ValueError

    def printTokenType(self, token):
        if token == QtCore.QXmlStreamReader.NoToken:
            return "NoToken"
        elif token == 1:
            return "Invalid"
        elif token == QtCore.QXmlStreamReader.StartDocument:
            return "StartDocument"
        elif token == QtCore.QXmlStreamReader.EndDocument:
            return "EndDocument"
        elif token == QtCore.QXmlStreamReader.StartElement:
            return "StartElement"
        elif token == QtCore.QXmlStreamReader.EndElement:
            return "EndElement"
        elif token == QtCore.QXmlStreamReader.Characters:
            return "Characters"
        elif token == QtCore.QXmlStreamReader.Comment:
            return "Comment"
        elif token == QtCore.QXmlStreamReader.DTD:
            return "DTD"
        elif token == QtCore.QXmlStreamReader.EntityReference:
            return "EntityReference"
        elif token == QtCore.QXmlStreamReader.ProcessingInstruction:
            return "ProcessingInstruction"
于 2014-09-04T13:22:40.687 回答