我的算法已经运行了几个小时,完成后必须将变量保存到文件中。虽然在写入文件时它由于内存异常而崩溃......虽然算法有效并且变量具有我需要的值。无论如何,此时是否可以检索该变量?顺便说一句,我是 python 新手,我完全知道我在问什么有点奇怪。
# -*- coding: utf-8 -*-
from scipy import *
feature_filename = '4087_features.pkl'
name = 'training'
minimum_features = 3
feature_output = '4087_features_min' + str(minimum_features) + '_' + name + '.txt'
print feature_output
# load necessary files for this step
print 'loading features'
import pickle
features = pickle.load(open(feature_filename, 'rb'))
features_new = {}
t = 0
from scipy.sparse import *
for k in features:
features_new[k] = t
t += 1
features = features_new
print feature_filename + ' loaded'
filename_in = '../../../Dropbox/Machinaal_leren/project/project/Emotion_Data_twitter/tweets_' + name + '.mat'
print 'loading ' + filename_in + '...'
import scipy.io
from numpy import *
try:
data
tweets
except NameError:
data = scipy.io.loadmat(filename_in)
tweets = data['tweets_' + name].squeeze()
print 'tweets_' + name + 'loaded'
execfile('functions.py')
import numpy as np
from multiprocessing import Pool
t = 0
def create_feature_vector(tweet, ground_truth):
feature_row = np.array([0] * len(features))
tweet = clean_tweet(tweet)
# N-grams
for N in range(3):
for j in range(0, len(tweet) - (N - 1)):
try:
key = ''
for m in range(N):
key += tweet[j + m] + ' '
index = features[key]
feature_row[index] += 1
except ValueError:
pass
except IndexError:
pass
except KeyError:
pass
count_features = (feature_row != 0).sum(0)
if(count_features >= minimum_features):
feature_row = [x / (1.*sum(feature_row)) for x in feature_row]
return(feature_row, ground_truth)
else:
return (9, 9)
emotions = ['emo_joy', 'emo_fear', 'emo_sadness', 'emo_thankfulness', 'emo_anger', 'emo_surprise', 'emo_love']
N_emo = len(emotions)
ground_truth_list = []
for i in range(len(tweets)):
feature_vector, ground_truth = create_feature_vector(tweets[i][0], emotions.index(tweets[i][1]) + 1)
print i
if(i==0):
feature_vector_matrix =coo_matrix(ground_truth)
else:
if((feature_vector != 9) and (ground_truth != 9)):
ground_truth_list.append(ground_truth)
feature_vector_matrix = vstack([feature_vector_matrix,coo_matrix(feature_vector)])
print 'Calculated the matrix, ground truth and saving files'
ground_truth_array = np.array(ground_truth_list)
output = open('ground_truth.pkl', 'wb')
pickle.dump(ground_truth_array, output)
output.close()
output2 = open('feature_matrix.pkl', 'wb')
pickle.dump(feature_vector_matrix, output2)
output2.close()
它在这条线之后崩溃了
print '计算矩阵、ground truth 和保存文件'
输出
Calculated the matrix, ground truth and saving files
Traceback (most recent call last):
File "C:\Users\Olivier.Janssens\Documents\Aptana Studio 3 Workspace\MachineLearningBNB\generate_feature_vectors.py", line 99, in <module>
pickle.dump(feature_vector_matrix, output2)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 1370, in dump
Pickler(file, protocol).dump(obj)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 224, in dump
self.save(obj)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
save(element)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 600, in save_list
self._batch_appends(iter(obj))
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 615, in _batch_appends
save(x)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 663, in _batch_setitems
save(v)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 562, in save_tuple
save(element)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 581, in save_tuple
self.memoize(obj)
File "C:\Program Files (x86)\python2.7\lib\pickle.py", line 247, in memoize
self.memo[id(obj)] = memo_len, obj
MemoryError
第 99 行是:
pickle.dump(feature_vector_matrix, output2)
我有 ground_truth.pkl,它看起来确实很完整