0

我的算法已经运行了几个小时,完成后必须将变量保存到文件中。虽然在写入文件时它由于内存异常而崩溃......虽然算法有效并且变量具有我需要的值。无论如何,此时是否可以检索该变量?顺便说一句,我是 python 新手,我完全知道我在问什么有点奇怪。

# -*- coding: utf-8 -*-
from scipy import *
feature_filename = '4087_features.pkl'
name = 'training'
minimum_features = 3
feature_output = '4087_features_min' + str(minimum_features) + '_' + name + '.txt'

print feature_output

# load necessary files for this step

print 'loading features'
import pickle
features = pickle.load(open(feature_filename, 'rb'))
features_new = {}
t = 0
from scipy.sparse import *
for k in features:
    features_new[k] = t
    t += 1
features = features_new
print feature_filename + ' loaded'

filename_in = '../../../Dropbox/Machinaal_leren/project/project/Emotion_Data_twitter/tweets_' + name + '.mat'
print 'loading ' + filename_in + '...'

import scipy.io
from numpy  import *
try:
    data
    tweets
except NameError:
    data = scipy.io.loadmat(filename_in)
    tweets = data['tweets_' + name].squeeze()
print 'tweets_' + name + 'loaded'

execfile('functions.py')

import numpy as np
from multiprocessing import Pool


t = 0


def create_feature_vector(tweet, ground_truth):
    feature_row = np.array([0] * len(features))
    tweet = clean_tweet(tweet)
    # N-grams
    for N in range(3):
        for j in range(0, len(tweet) - (N - 1)):
            try:
                key = ''
                for m in range(N):
                    key += tweet[j + m] + ' '
                index = features[key]
                feature_row[index] += 1
            except ValueError:
                pass
            except IndexError:
                pass
            except KeyError:
                pass
    count_features = (feature_row != 0).sum(0)
    if(count_features >= minimum_features):
        feature_row = [x / (1.*sum(feature_row)) for x in feature_row]
        return(feature_row, ground_truth)
    else:
        return (9, 9)

emotions = ['emo_joy', 'emo_fear', 'emo_sadness', 'emo_thankfulness', 'emo_anger', 'emo_surprise', 'emo_love']
N_emo = len(emotions)
ground_truth_list = []

for i in range(len(tweets)):
    feature_vector, ground_truth = create_feature_vector(tweets[i][0], emotions.index(tweets[i][1]) + 1)
    print i
    if(i==0):
        feature_vector_matrix =coo_matrix(ground_truth)

    else:
        if((feature_vector != 9) and (ground_truth != 9)):
            ground_truth_list.append(ground_truth)
            feature_vector_matrix = vstack([feature_vector_matrix,coo_matrix(feature_vector)])




print 'Calculated the matrix, ground truth and saving files'


ground_truth_array = np.array(ground_truth_list)

output = open('ground_truth.pkl', 'wb')
pickle.dump(ground_truth_array, output)
output.close()

output2 = open('feature_matrix.pkl', 'wb')
pickle.dump(feature_vector_matrix, output2)
output2.close()

它在这条线之后崩溃了

print '计算矩阵、ground truth 和保存文件'

输出

Calculated the matrix, ground truth and saving files
Traceback (most recent call last):
  File "C:\Users\Olivier.Janssens\Documents\Aptana Studio 3 Workspace\MachineLearningBNB\generate_feature_vectors.py", line 99, in <module>
    pickle.dump(feature_vector_matrix, output2)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 1370, in dump
    Pickler(file, protocol).dump(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 224, in dump
    self.save(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
    save(element)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 600, in save_list
    self._batch_appends(iter(obj))
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 615, in _batch_appends
    save(x)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 649, in save_dict
    self._batch_setitems(obj.iteritems())
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 663, in _batch_setitems
    save(v)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
    self.save_reduce(obj=obj, *rv)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
    save(state)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
    save(element)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 581, in save_tuple
    self.memoize(obj)
  File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 247, in memoize
    self.memo[id(obj)] = memo_len, obj
MemoryError

第 99 行是:

pickle.dump(feature_vector_matrix, output2)

我有 ground_truth.pkl,它看起来确实很完整

4

3 回答 3

0

利用except MemoryError

这样,如果您遇到内存异常,您就可以收到该值。

你应该这样使用它:

try:
   // Your code where you get the error
except MemoryError:
   //save or print your values here

但是,如果这种崩溃更频繁地发生,您可能需要优化您的代码。寻找非常大的处理步骤。有时它有助于将过程拆分为更小的步骤或首先存储信息。

于 2012-12-07T12:51:55.333 回答
0

好消息,根据您的代码,您不在任何嵌套函数或类似函数中。只要您保持 Python 窗口打开,您就应该能够像在代码中那样获取变量。换句话说,只需在 python 提示符下运行这段代码。

ground_truth_array = np.array(ground_truth_list)

output = open('ground_truth.pkl', 'wb')
pickle.dump(ground_truth_array, output)
output.close()

output2 = open('feature_matrix.pkl', 'wb')
pickle.dump(feature_vector_matrix, output2)
output2.close()

如果您仍然没有 python 提示,那么您基本上不走运,并且必须重新运行数据。请记住这一点,以便将来保存变量或使用较小的子集测试功能,以确保崩溃不会杀死您。

于 2012-12-07T12:56:49.810 回答
0

当代码尝试从(我认为它非常大)创建一个numpy数组时,您会崩溃。ground_truth_list我的建议是在创建阵列之前将列表保存到磁盘。这样,您将始终能够读取列表的值(无论您是否有 Python 提示符)。

更新

如果不能腌制的对象是一个矩阵(顾名思义),一个可能的解决方案是将它分成几个切片(或者只是创建切片而不是整个矩阵),然后将每个切片腌制到磁盘。稍后,当您需要使用该矩阵时,您必须加载各个切片并再次加入以恢复原始矩阵。也许不是最有效的解决方案,但我认为它应该有效。

于 2012-12-07T13:03:04.803 回答