请在下面检查我的代码。我试图在下面的程序中找到内存泄漏。
run_before
并且run_after
支持在函数调用之前和之后计算字典数量的函数。
我为每个日期调用 processDate ...我在每次调用后检查字典计数。字典的总数不断增加,大约有 10,000 部。仅创建本地字典。有一个全局字典,每次调用都会更新,但它不会添加新字典,至少不是 10,000 个。我不知道这本字典的数量在哪里增加。它是熊猫数据框切片的内部吗?
编辑:根据要求添加完整的课程。它依赖于其他类。我很确定其他类没有这种内存泄漏,因为泄漏只有在我介绍下面的类时才开始发生。
import math,logging,time
import numpy as np
import alphaCalculator,regConfig,pdb,pandas,time
import regressor
from basics import *
from matplotlib import pyplot as plt
import gc
from collections import defaultdict
from gc import get_objects
class AlphaAnalyser(alphaCalculator.AlphaCalculator):
def __init__(self,regInfo,dateFrom=None,dateTo=None):
alphaCalculator.AlphaCalculator.__init__(self,regInfo,dateFrom,dateTo)
self.stockList=regInfo.stockListReg # list of stock to calc the stat on
self.thresholdList=[-1,0,0.1,0.3,0.5,0.7] # alpha threshold it's a pct of the spread
self.lagList=[100,600,1800,5000] # in sec
#initialise result dictionnaries
self.fields=(["nbPaperTrades","alpha","obj","obj2","objalpha","objalpha2"])
for l in self.lagList:
self.fields+=["realised_"+str(l)]
self.results=pandas.DataFrame(columns=["date","ric","threshold"]+self.fields)
self.results.set_index(["date","ric","threshold"],drop=False)
self.before=defaultdict(int)
self.after=defaultdict(int)
def resetInfo(self,regInfo):
self.regInfo=regInfo
self.stockList=regInfo.stockListReg
def getDtAlpha(self,stock,date):
self.tickData.setDate(date)
if stock not in self.index.refData.ix[date.date().isoformat()].ric:
#logging.warning("[%s]: %s doesn't exist on day %s" % (__name__, stock, date.date().isoformat()))
return()
return(self.computeAlpha(product=stock,date=date))
def run_before(self):
self.before=defaultdict(int)
for i in get_objects():self.before[type(i)]+=1
def run_after(self):
self.after=defaultdict(int)
gc.collect()
for i in get_objects():self.after[type(i)]+=1
print("Objects which are not garbage collected: ->")
print("Dict count diff (" + str(self.after[type({})]) + "+" + str(self.before[type({})]) + "): "+str(self.after[type({})] - self.before[type({})]))
def processDate(self,date):
dt1=0
dt2=0
if self.regInfo.regType==regConfig.RegType.INDEX_REG or self.stockList==[]:
stockList = self.index.refData.ix[date.date().isoformat()].ric.tolist()
else:
stockList=self.stockList
self.tickData.setDate(date)
result=[]
for stock in stockList:
if stock not in self.index.refData.ix[date.date().isoformat()].ric:
#logging.warning("[%s]: %s doesn't exist on day %s" % (__name__, stock, date.date().isoformat()))
continue
print stock
alphaData=self.computeAlpha(stock,date) ## no dictionaries are created in this function
if not(self.isValid):
continue
objData=self.computeObj(stock,date) ## no dictionaries are created in this function
if not(self.isValid):
continue
nanfilter=np.isnan(alphaData["VALUES"])
nanfilter+=np.isnan(objData["VALUES"])
nanfilter=~nanfilter
alphaData=alphaData["VALUES"][nanfilter]
objData=objData["VALUES"][nanfilter]
spread=self.index.refData.ix[date.date().isoformat(),stock]["spread"]
paperTrades={}
dfs=[]
for i in range(len(self.thresholdList)):
t=self.thresholdList[i]
if t<0:
paperTrades[t]=np.repeat(True,len(alphaData))
elif i==len(self.thresholdList)-1:
paperTrades[t]=(abs(alphaData)>=t*spread)
else:
paperTrades[t]=(abs(alphaData)>=t*spread)*(abs(alphaData)<self.thresholdList[i+1]*spread)
nbPaperTrades=nansum(paperTrades[t])
sumAlpha=np.sum(abs(alphaData[paperTrades[t]]))
sumobj=np.sum(objData[paperTrades[t]])
sumobj2=np.sum(np.square(objData[paperTrades[t]]))
sumoa=np.sum(objData[paperTrades[t]]-alphaData[paperTrades[t]])
sumoa2=np.sum(np.square(objData[paperTrades[t]]-alphaData[paperTrades[t]]))
dfs.append(pandas.DataFrame([[date,stock,t,nbPaperTrades,sumAlpha,sumobj,sumobj2,sumoa,sumoa2,]+[0.0]*len(self.lagList)],columns=self.results.columns))
del t,nbPaperTrades,sumAlpha,sumobj,sumobj2,sumoa,sumoa2
for l in self.lagList:
objData=self.computeObj(stock,date,delay=l)
if not(self.isValid):
break
objData=objData["VALUES"][nanfilter]
for i in range(len(self.thresholdList)):
t=self.thresholdList[i]
dataIndex=paperTrades[t]
val=np.sign(alphaData[dataIndex])*objData[dataIndex]
dfs[i]["realised_"+str(l)]=nansum(val)
del dataIndex, val, t
self.rowResults.append(dfs)
del alphaData, objData, nanfilter, spread, paperTrades, dfs, stockList, date
def analyseResult(self):
dateGen = regressor.DateGenerator(self.regInfo.indexName,self.dateFrom,self.dateTo)
self.rowResults=[]
for date in dateGen.getDates():
#logging.debug("doing date "+date.date().isoformat())
try:
self.run_before()
self.processDate(date) # check memory
self.run_after()
except Exception:
logging.error("Analyser can't process date "+date.date().isoformat())
self.processFinalResult()
def processFinalResult(self):
if len(self.rowResults)>0:
self.results=self.results.append(self.rowResults)