0

我正在尝试读取一个大文本文件(1.08 MB,116253 个字),我遇到了程序在进入文本文件的大约 1/20 处停止的问题(在Read函数中停止,甚至不执行print语句)。

这是我现在拥有的功能:

from binary_search_tree import BinarySearchTree
from tree_node import TreeNode
import sys

sys.setrecursionlimit(5000000)

MovieTree = BinarySearchTree()

def Read(filename):
    file = open('movieData.txt')
    for line in file:
        MovieTree[line.strip()] = line
        print(line)
    file.close()

Read('movieData.txt')
print(MovieTree)

给我的 binary_search_tree 和 tree_node 因为这是一个家庭作业,所以我假设它是有效的,因为它以前被使用过。

有谁知道问题是什么?

二叉搜索树:

from tree_node import TreeNode

class BinarySearchTree:

    def __init__(self):
        self.root = None
        self.size = 0

    def length(self):
        return self.size

    def __len__(self):
        return self.size

    def __iter__(self):
        return self.root.__iter__()

    def __str__(self):
        """Returns a string representation of the tree
           rotated 90 degrees counter-clockwise"""

        def strHelper(root, level):
            resultStr = ""
            if root:
                resultStr += strHelper(root.rightChild, level+1)
                resultStr += "| " * level
                resultStr += str(root.key) + "\n"
                resultStr += strHelper(root.leftChild, level+1)                
            return resultStr


        return strHelper(self.root, 0)


    def __contains__(self,key):
        if self._get(key,self.root):
            return True
        else:
            return False

    def get(self,key):
        if self.root:
            res = self._get(key,self.root)
            if res:
                return res.payload
            else:
                return None
        else:
            return None

    def _get(self,key,currentNode):
        if not currentNode:
            return None
        elif currentNode.key == key:
            return currentNode
        elif key < currentNode.key:
            return self._get(key,currentNode.leftChild)
        else:
            return self._get(key,currentNode.rightChild)

    def __getitem__(self,key):
        return self.get(key) 

    def __setitem__(self,k,v):
        self.put(k,v)

    def put(self,key,val):
        if self.root:
            self._put(key,val,self.root)
        else:
            self.root = TreeNode(key,val)
        self.size = self.size + 1

    def _put(self,key,val,currentNode):
        if key < currentNode.key:
            if currentNode.hasLeftChild():
                self._put(key,val,currentNode.leftChild)
            else:
                currentNode.leftChild = TreeNode(key,val,
                                          parent=currentNode)
        else:
            if currentNode.hasRightChild():
                self._put(key,val,currentNode.rightChild)
            else:
                currentNode.rightChild = TreeNode(key,val,
                                          parent=currentNode)

    def delete(self,key):
      if self.size > 1:
          nodeToRemove = self._get(key,self.root)
          if nodeToRemove:
              self.remove(nodeToRemove)
              self.size = self.size-1
          else:
              raise KeyError('Error, key not in tree')
      elif self.size == 1 and self.root.key == key:
          self.root = None
          self.size = self.size - 1
      else:
          raise KeyError('Error, key not in tree')

    def __delitem__(self,key):
        self.delete(key)


    def remove(self,currentNode):
      if currentNode.isLeaf(): #leaf
        if currentNode == currentNode.parent.leftChild:
            currentNode.parent.leftChild = None
        else:
            currentNode.parent.rightChild = None
      elif currentNode.hasBothChildren(): #interior
        succ = currentNode.findSuccessor()
        succ.spliceOut()
        currentNode.key = succ.key
        currentNode.payload = succ.payload

      else: # this node has one child
        if currentNode.hasLeftChild():
          if currentNode.isLeftChild():
              currentNode.leftChild.parent = currentNode.parent
              currentNode.parent.leftChild = currentNode.leftChild
          elif currentNode.isRightChild():
              currentNode.leftChild.parent = currentNode.parent
              currentNode.parent.rightChild = currentNode.leftChild
          else:
              currentNode.replaceNodeData(currentNode.leftChild.key,
                                 currentNode.leftChild.payload,
                                 currentNode.leftChild.leftChild,
                                 currentNode.leftChild.rightChild)

        else:
          if currentNode.isLeftChild():
              currentNode.rightChild.parent = currentNode.parent
              currentNode.parent.leftChild = currentNode.rightChild
          elif currentNode.isRightChild():
              currentNode.rightChild.parent = currentNode.parent
              currentNode.parent.rightChild = currentNode.rightChild
          else:
              currentNode.replaceNodeData(currentNode.rightChild.key,
                                 currentNode.rightChild.payload,
                                 currentNode.rightChild.leftChild,
                                 currentNode.rightChild.rightChild)

树节点:

class TreeNode:
   def __init__(self,key,val,left=None,right=None,
                                       parent=None):
      self.key = key
      self.payload = val
      self.leftChild = left
      self.rightChild = right
      self.parent = parent

   def hasLeftChild(self):
      return self.leftChild

   def hasRightChild(self):
      return self.rightChild

   def isLeftChild(self):
      return self.parent and \
             self.parent.leftChild == self

   def isRightChild(self):
      return self.parent and \
             self.parent.rightChild == self

   def isRoot(self):
      return not self.parent

   def isLeaf(self):
      return not (self.rightChild or self.leftChild)

   def hasAnyChildren(self):
      return self.rightChild or self.leftChild

   def hasBothChildren(self):
      return self.rightChild and self.leftChild

   def replaceNodeData(self,key,value,lc,rc):
      self.key = key
      self.payload = value
      self.leftChild = lc
      self.rightChild = rc
      if self.hasLeftChild():
          self.leftChild.parent = self
      if self.hasRightChild():
          self.rightChild.parent = self

   def __iter__(self):

      if self:
         if self.hasLeftChild():
              for elem in self.leftChild:
                 yield elem
         yield self.key
         if self.hasRightChild():
              for elem in self.rightChild:
                 yield elem

   def findSuccessor(self):
      succ = None
      if self.hasRightChild():
          succ = self.rightChild.findMin()
      else:
          if self.parent:
              if self.isLeftChild():
                  succ = self.parent
              else:
                  self.parent.rightChild = None
                  succ = self.parent.findSuccessor()
                  self.parent.rightChild = self
      return succ

   def findMin(self):
      current = self
      while current.hasLeftChild():
          current = current.leftChild
      return current

   def spliceOut(self):
      if self.isLeaf():
         if self.isLeftChild():
            self.parent.leftChild = None
         else:
            self.parent.rightChild = None
      elif self.hasAnyChildren():
         if self.hasLeftChild():
            if self.isLeftChild():
               self.parent.leftChild = self.leftChild
            else:
               self.parent.rightChild = self.leftChild
            self.leftChild.parent = self.parent
         else:
            if self.isLeftChild():
               self.parent.leftChild = self.rightChild
            else:
               self.parent.rightChild = self.rightChild
            self.rightChild.parent = self.parent
4

2 回答 2

1

堆栈溢出也会在这里崩溃。

由于您的代码不包含递归,因此它必须在老师给您的课程中。我认为溢出发生在put(self, key, val)方法中。

恐怕我几乎不知道 Python,所以我不能给你任何进一步的帮助。

于 2012-11-13T16:22:56.827 回答
1

1.08 MB 是一个小文件。为什么要使用 BinarySearchTree?您可以通过将数据转储到字典中轻松处理此问题。

如果 BinarySearchTree 是作业的一部分,在我看来它有错误。我将从它的 _put() 方法开始跟踪。

顺便说一句,您不应该使用sys.setrecursionlimit(5000000)来解决问题。除非您的数据大小在 2^5000000 的数量级,否则体面的二分搜索不会达到此限制。对于 116253 个单词,你有一个平衡的二叉树应该只需要 12 级递归。

于 2012-11-13T18:39:09.593 回答