没什么技术含量,直接贴代码:
#coding = utf8
from csdn.models import csdnuser
#except:
# print u'没有开启django shell'
def d():
source = open('E:\\sync\\docments\\csdn.sql','r')
i = 0
while True:
all_temp = source.readline()
if len(all_temp) == 0:
break
try:
temp = all_temp.replace("'","''").decode('gb18030').split()
user_temp = csdnuser(username = temp[0], userpass = temp[2], useremail = temp[4])
except:
user_temp = csdnuser(username = u'qtsharp',userpass = u' ',useremail = u'qtsharp@qq.com')
print 'wrong: %s\n' %i
print all_temp
user_temp.save()
n = i + 1
if n>=10000 and n%10000==0:
print u'已导入%s万个' % (n/10000)
i += 1
print u'导入完成,共导入%s个。' %n
source.close()
发现了一个很严重的问题,内存占用一直很高,而且不断在长,应该怎样及时释放内存呢?
暂时想不到好的办法,先改成下面这样,然后设定处理数量,分段进行。
#coding = utf8
try:
from csdn.models import csdnuser
except:
print u'没有开启django shell'
file_path = 'E:\\sync\\docments\\csdn.sql'
#跳过num行
def pass_lines(num,source):
for n in range(num):
source.readline()
#主函数
def d(begin_num,limit_num):
#设定参数
i = begin_num
limit = limit_num
#初始化
source = open(file_path,'r')
length = len(source.readlines())
source.close()
if limit > length:
limit = length
#开始读取
source = open(file_path,'r')
pass_lines(i,source) #跳过i行
#数据写入循环
while i<limit:
all_temp = source.readline()
try:
temp = all_temp.decode('gb18030').split()
user_temp = csdnuser(username = temp[0], userpass = temp[2], useremail = temp[4])
except:
user_temp = csdnuser(username = u'qtsharp',userpass = u' ',useremail = u'qtsharp@qq.com') print 'wrong: %s\n' %i print all_temp
#提交修改
user_temp.save()
#进度提示
n = i + 1
if n>=10000 and n%10000==0:
print u'已导入%s万个' % (n/10000)
i += 1
print u'导入完成,共导入%s个' %(limit_num-begin_num)
source.close()