python的垃圾回收问题_Python垃圾回收器有问题吗?

我有一个简单的程序,它读取一个包含几百万行的大文件,解析每一行(numpy array),并转换成一个双精度数组(python array),然后写入一个hdf5 file。我重复这个循环好几天。读取每个文件后,我删除所有对象并调用垃圾回收器。{cd4>在第二天运行时没有任何错误。我监控了程序的内存使用情况,在解析的第一天,内存使用量大约是1.5GB。第一天解析完成后,内存使用量下降到50MB。现在当第二天开始的时候,我试着从文件中读出MemoryError。以下是程序的输出。在source file extracted at C:\rfadump\au\2012.08.07.txt

parsing started

current time: 2012-09-16 22:40:16.829000

500000 lines parsed

1000000 lines parsed

1500000 lines parsed

2000000 lines parsed

2500000 lines parsed

3000000 lines parsed

3500000 lines parsed

4000000 lines parsed

4500000 lines parsed

5000000 lines parsed

parsing done.

end time is 2012-09-16 23:34:19.931000

total time elapsed 0:54:03.102000

repacking file

done

> s:\users\aaj\projects\pythonhf\rfadumptohdf.py(132)generateFiles()

-> while single_date <= self.end_date:

(Pdb) c

*** 2012-08-08 ***

source file extracted at C:\rfadump\au\2012.08.08.txt

cought an exception while generating file for day 2012-08-08.

Traceback (most recent call last):

File "rfaDumpToHDF.py", line 175, in generateFile

lines = self.rawfile.read().split('|\n')

MemoryError

我确信windows系统任务管理器会将内存使用情况显示为50mb。Python的垃圾回收器或内存管理器似乎没有正确地计算空闲内存。应该有很多空闲内存,但它认为没有足够的内存。在

有什么想法吗?在

编辑

在此处添加我的代码

我会把我的部分代码。我是python新手,请原谅我的python编码风格。在

模块1

^{pr2}$

模块2-taqdb-将解析后的数据存储在数组中class taqDB:

def __init__(self, index, offset):

self.index = index

self.tickcfg = config.hdf5.getTickConfig(index)

self.offset = offset

self.groups = {}

def getGroup(self,ric):

if (self.groups.has_key(ric) == False):

self.groups[ric] = {}

return self.groups[ric]

def getOrderbookArray(self, ric, group):

datasetname = orderBookName

prodtype = self.tickcfg.getProdType(ric)

if(prodtype == ProdType.INDEX):

return

orderbookArrayShape = self.tickcfg.getOrderBookArrayShape(prodtype)

if(group.has_key(datasetname) == False):

group[datasetname] = array.array("d")

orderbookArray = self.tickcfg.getOrderBookArray(prodtype)

return orderbookArray

else:

orderbookArray = group[datasetname]

if(len(orderbookArray) == 0):

return self.tickcfg.getOrderBookArray(prodtype)

lastOrderbook = orderbookArray[-orderbookArrayShape[1]:]

return np.array([lastOrderbook])

def addToDataset(self, group, datasetname, timestamp, arr):

if(group.has_key(datasetname) == False):

group[datasetname] = array.array("d")

arr[0,0]=timestamp

a1 = group[datasetname]

a1.extend(arr[0])

def addToOrderBook(self, group, timestamp, arr):

self.addToDataset(self, group, orderBookName, timestamp, arr)

def insert(self, data):

ric, timestamp, quotes, trades, levelsUpdated, tradeupdate = data

delta = dt.timedelta(hours=timestamp.hour,minutes=timestamp.minute, seconds=timestamp.second, microseconds=(timestamp.microsecond/1000))

timestamp = float(str(delta.seconds)+'.'+str(delta.microseconds)) + self.offset

## write to array

group = self.getGroup(ric)

orderbookUpdate = False

orderbookArray = self.getOrderbookArray(ric, group)

nonzero = quotes.nonzero()

orderbookArray[nonzero] = quotes[nonzero]

if(np.any(nonzero)):

self.addToDataset(group, orderBookName, timestamp, orderbookArray)

if(tradeupdate == True):

self.addToDataset(group, tradeName, timestamp, trades)

模块3-解析器class rfaTextToTAQ:

"""RFA Raw dump file reader. Readers single line (record) and returns an array or array of fid value pairs."""

def __init__(self,tickconfig):

self.tickconfig = tickconfig

self.token = ''

self.state = ReadState.SEQ_NUM

self.fvstate = fvstate.FID

self.quotes = np.array([]) # read from tickconfig

self.trades = np.array([]) # read from tickconfig

self.prodtype = ProdType.STOCK

self.allquotes = {}

self.alltrades = {}

self.acvol = 0

self.levelsUpdated = []

self.quoteUpdate = False

self.tradeUpdate = False

self.depth = 0

def updateLevel(self, index):

if(self.levelsUpdated.__contains__(index) == False):

self.levelsUpdated.append(index)

def updateQuote(self, fidindex, field):

self.value = float(self.value)

if(self.depth == 1):

index = fidindex[0]+(len(self.tickconfig.stkQuotes)*(self.depth - 1))

self.quotes[index[0]][fidindex[1][0]] = self.value

self.updateLevel(index[0])

else:

self.quotes[fidindex] = self.value

self.updateLevel(fidindex[0][0])

self.quoteUpdate = True

def updateTrade(self, fidindex, field):

#self.value = float(self.value)

if(self.tickconfig.tradeUpdate(self.depth) == False):

return

newacvol = float(self.value)

if(field == acvol):

if(self.value > self.acvol):

tradesize = newacvol - self.acvol

self.acvol = newacvol

self.trades[fidindex] = tradesize

if(self.trades.__contains__(0) == False):

self.tradeUpdate = True

else:

self.trades[fidindex] = self.value

if(not (self.trades[0,1]==0 or self.trades[0,2]==0)):

self.tradeUpdate = True

def updateResult(self):

field = ''

valid, field = field_dict.FIDToField(int(self.fid), field)

if(valid == False):

return

if(self.value == '0'):

return

if(self.prodtype == ProdType.STOCK):

fidindex = np.where(self.tickconfig.stkQuotes == field)

if(len(fidindex[0]) == 0):

fidindex = np.where(self.tickconfig.stkTrades == field)

if(len(fidindex[0]) == 0):

return

else:

self.updateTrade(fidindex, field)

else:

self.updateQuote(fidindex, field)

else:

fidindex = np.where(self.tickconfig.futQuotes == field)

if(len(fidindex[0]) == 0):

fidindex = np.where(self.tickconfig.futTrades == field)

if(len(fidindex[0]) == 0):

return

else:

self.updateTrade(fidindex, field)

else:

self.updateQuote(fidindex, field)

def getOrderBookTrade(self):

if (self.allquotes.has_key(self.ric) == False):

acvol = 0

self.allquotes[self.ric] = self.tickconfig.getOrderBookArray(self.prodtype)

trades = self.tickconfig.getTradesArray()

self.alltrades[self.ric] = [trades, acvol]

return self.allquotes[self.ric], self.alltrades[self.ric]

def parseline(self, line):

self.tradeUpdate = False

self.levelsUpdated = []

pos = 0

length = len(line)

self.state = ReadState.SEQ_NUM

self.fvstate = fvstate.FID

self.token = ''

ch = ''

while(pos < length):

prevChar = ch

ch = line[pos]

pos += 1

#SEQ_NUM

if(self.state == ReadState.SEQ_NUM):

if(ch != ','):

self.token += ch

else:

self.seq_num = int(self.token)

self.state = ReadState.TIMESTAMP

self.token = ''

# TIMESTAMP

elif(self.state == ReadState.TIMESTAMP):

if(ch == ' '):

self.token = ''

elif(ch != ','):

self.token += ch

else:

if(len(self.token) != 12):

print "Invalid timestamp format. %s. skipping line.\n", self.token

self.state = ReadState.SKIPLINE

else:

self.timestamp = datetime.strptime(self.token,'%H:%M:%S.%f')

self.state = ReadState.RIC

self.token = ''

# RIC

elif(self.state == ReadState.RIC):

if(ch != ','):

self.token += ch

else:

self.ric = self.token

self.token = ''

self.ric, self.depth = self.tickconfig.replaceRic(self.ric)

self.prodtype = self.tickconfig.getProdType(self.ric)

if(self.tickconfig.subscribed(self.ric)):

self.state = ReadState.UPDATE_TYPE

self.quotes, trades = self.getOrderBookTrade()

self.trades = trades[0]

self.acvol = trades[1]

else:

self.state = ReadState.SKIPLINE

# UPDATE_TYPE

elif(self.state == ReadState.UPDATE_TYPE):

if(ch != '|'):

self.token += ch

else:

self.update_type = self.token

self.token = ''

self.state = ReadState.FVPAIRS

#SKIPLINE

elif(self.state == ReadState.SKIPLINE):

return None

# FV PAIRS

elif(self.state == ReadState.FVPAIRS):

# FID

if(self.fvstate == fvstate.FID):

if(ch != ','):

if(ch.isdigit() == False):

self.token = self.value+ch

self.fvstate = fvstate.FIDVALUE

self.state = ReadState.FVPAIRS

else:

self.token += ch

else:

self.fid = self.token

self.token = ''

self.fvstate = fvstate.FIDVALUE

self.state = ReadState.FVPAIRS

# FIDVALUE

elif(self.fvstate == fvstate.FIDVALUE):

if(ch != '|'):

self.token += ch

else:

self.value = self.token

self.token = ''

self.state = ReadState.FVPAIRS

self.fvstate = fvstate.FID

# TODO set value

self.updateResult()

return self.ric, self.timestamp, self.quotes, self.trades, self.levelsUpdated, self.tradeUpdate

谢谢。在

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值