1

我正在尝试在 python 中构建一个 2-3-4 树。到目前为止,插入似乎工作到高度为 3 左右的节点。在那之后,数据似乎被丢弃而不是被插入到树中。我不确定为什么会发生这种情况,并多次检查了我的代码。我已经在我得到插入算法的插入代码附近发表了评论。提前感谢您对我的问题的任何见解。

import re
class Node:
    def __init__(self, newInfo = None, p = None, q = None,
                 r = None, s = None, parent = None):
        #initilize parent / child links
        self.children = [p, q, r, s]
        self.parent = parent
        #initialize data 
        self.info = [ newInfo, None, None ]

class TwoThreeFourTree:
    def __init__(self):
        self.root = Node()

    # Built in sort sinks None types to front, which
    # is opposite of how info was structured to work,
    # Using this sort will push None values to back.
    def sortNode(self, curr):
        c = []
        for i in curr.info:
            if i is not None:
                c.append(i)
        c.sort()
        for i in range(3-len(c)):
            c.append(None)
        return c            

    # Built in len counts None, this one doesn't.
    def length(self, node):
        i = 0
        for info in node:
            if info is not None:
                i = i + 1
        return i

    def isLeaf(self, node):
        for child in node.children:
            if child is not None:
                return False
        return True

    def isOrphan(self, node):
        if node.parent is None:
            return True
        return False

    def lookup(self, userStr, reg):
        curr = self.root
        while curr is not None:
            #Two Node
            if self.length(curr.info) is 1:
                if re.match(reg, str(curr.info[0])) is not None:
                    return curr.info[0]
                else:
                    if userStr < curr.info[0]:
                        curr = curr.children[0]
                    else:
                        curr = curr.children[1]

            #Three Node
            elif self.length(curr.info) is 2:
                for item in curr.info:
                    if re.match(reg, str(item)) is not None:
                        return item
                if userStr < curr.info[0]:
                    curr = curr.children[0]
                elif userStr < curr.info[1]:
                    curr = curr.children[1]
                else:
                    curr = curr.children[2]

            #Four Node
            elif self.length(curr.info) is 3:
                for item in curr.info:
                    if item is not None:
                        if re.match(reg, str(item)) is not None:
                            return item
                if userStr < curr.info[0]:
                    curr = curr.children[0]
                elif userStr < curr.info[1]:
                    curr = curr.children[1]
                elif userStr < curr.info[2]:
                    curr = curr.children[2]
                else:
                    curr = curr.children[3]

    def inorder(self, node, retlst = None):
        if retlst is None:
            retlst = [] 
        if node.children[0]:
            retlst = self.inorder(node.children[0], retlst)
        retlst += [node.info[0]] 
        if node.children[1]:
            retlst = self.inorder(node.children[1], retlst)
        retlst += [node.info[1]]
        if node.children[2]:
            retlst = self.inorder(node.children[2], retlst)
        retlst += [node.info[2]]
        if node.children[3]:
            retlst = self.inorder(node.children[3], retst)
        return retlst

    ## Using Algorithm from: http://www.clear.rice.edu/comp212/01-fall/lectures/33/
    def insert(self, info, node):
        curr = node
        if self.length(curr.info) == 0: # curr is empty
            curr.info[0] = info
            return True

        elif self.length(curr.info) == 1: # curr is two node.
            if self.isLeaf(curr):
                curr.info[1] = info
                curr.info = self.sortNode(curr)
                return True
            else:
                if info < curr.info[0]:
                    self.insert(info, curr.children[0])
                else:
                    self.insert(info, curr.children[1])

        elif self.length(curr.info) == 2: # curr is 3 node
            if self.isLeaf(curr):
                curr.info[2] = info
                curr.info = self.sortNode(curr)
                return True
            else:
                if info < curr.info[0]:
                    self.insert(info, curr.children[0])
                elif info < curr.info[1]:
                    self.insert(info, curr.children[1])
                elif info > curr.info[2]:
                    self.insert(info, curr.children[2])

        elif self.length(curr.info) == 3: # curr is 4 node
            if self.isOrphan(curr): # curr has no parent
                curr = Node(curr.info[1],
                            Node(curr.info[0], curr.children[0], curr.children[1]),
                            Node(curr.info[2], curr.children[2], curr.children[3]))
                for child in curr.children:
                    if child is not None:
                        child.parent = curr
                self.root = curr
                self.insert(info, self.root)

            else:   #curr has a parent
                if self.length(curr.parent.info) == 1: # Parent is Two Node:
                    if curr.parent.children[0] == curr:# cur is lst.
                    #If P = [curr, M, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M, p-rst].
                        curr.parent.info[1] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[2] = curr.parent.children[1]
                        curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[1] == curr: # curr is rst.
                    #If P = [p-lst, M, curr], then P becomes [p-lst, M, [lst, X, mlst], Y, [mrst, Z, rst]].
                        curr.parent.info[1] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                elif self.length(curr.parent.info) == 2: # Parent is Three Node:

                    if curr.parent.children[0] == curr: # curr is lst
                    #If P = [curr, M1, p-mst, M2, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M1, p-mst, M2, p-rst].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = curr.parent.children[2]
                        curr.parent.children[2] = curr.parent.children[1]
                        curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[1] == curr: # curr is mst
                    #If P = [p-lst, M1, curr, M2, p-rst], then P becomes [p-lst, M1,[lst, X, mlst], Y, [mrst, Z, rst], M2, p-rst].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = curr.parent.children[2]
                        curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)

                    elif curr.parent.children[2] == curr: # curr is rst
                    #If P = [p-lst, M1, p-mst, M2, curr], then P becomes [p-lst, M1, p-mst, M2, [lst, X, mlst], Y, [mrst, Z, rst]].
                        curr.parent.info[2] = curr.info[1]
                        curr.parent.info = self.sortNode(curr.parent)
                        curr.parent.children[3] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
                        curr.parent.children[2] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
                        self.insert(info, self.root)
4

1 回答 1

2

作为部分答案,您可以从消除代码中的大部分重复开始。例如,您不需要lookup()针对三种不同的节点类型对函数进行特殊处理。你可以改为这样写:

def lookup(self, s, r, node=None):
    if not node: node = self.root
    for item in node.info:
        if re.match(r, str(item)):
            return item
    for (i, item) in enumerate(curr.info):
        if s < item:
            return self.lookup(s, r, curr.children[i])
    return self.lookup(s, r, curr.children[-1]) # Assume len(children) == len(info) + 1

此外,遍历应该实现为生成器,并且再次使用循环而不是重复代码:

def inorder(self, node):
    for (i, item) in enumerate(node.info):
        if node.children[i]: self.inorder(node.children[i])
        yield item
    if node.children[-1]: self.inorder(node.children[-1])

这种清理首先会让其他人更容易诊断问题。其次,您可能会发现问题在此过程中消失了。

于 2013-02-24T02:26:53.637 回答