python模版

最近代码的重复量太高了 找个时间整理一下 生成一个方便的模块模板


#!/usr/bin/env python
#-*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')


import os,traceback,<span style="font-family: Arial, Helvetica, sans-serif;">logging</span>
<pre name="code" class="python">import multiprocessing
<pre name="code" class="python">import re

import jiebaimport ConfigParser,import datetimedef myprocess(num,indir,titles,lock):try:fin = open(indir + os.sep + str(num),'r')cnt0 = 0for line in fin:cnt0 += 1if cnt0 %100 == 0:print num,cnt0if cnt0 == 10000:breakdata = eval(line.strip())us_id = int(data[21])title
 = data[8]if title.strip() == '':continuewith lock:if title in titles:titles[title].append(us_id) else:titles[title]=[us_id]print num,'END'except Exception,e:traceback.print_exc()def func():indir = '/root/data/bids/data_all'outdir = './titles1119'#if not os.path.exists(outdir):#
 os.mkdir(outdir)start,end,add = 0,20,1manager = multiprocessing.Manager()lock = manager.Lock()titles = manager.dict()#开启进程池pool = multiprocessing.Pool(processes = 16)for num in range(start,end,add):pool.apply_async(myprocess,[num,indir,titles,lock])pool.close()pool.join()titles
 = dict(titles)with open(outdir,'w') as fout:for title in titles:print >> fout,'%s\t%s'%(title,titles[title])if len(titles[title])>1:print titles[title]if __name__ == '__main__':program = os.path.basename(sys.argv[0])logger = logging.getLogger(program)logging.basicConfig(format='%(asctime)s:
 %(levelname)s: %(message)s',filename = program+'.log',filemode='a')logging.root.setLevel(level=logging.INFO)logger.info("start running %s" % ' '.join(sys.argv))func()logger.info("end\t running %s" % ' '.join(sys.argv))





阅读更多
上一篇python 多进程共享变量
下一篇python处理文本又一坑--又是编码
想对作者说点什么? 我来说一句

python-django模板

2017年02月11日 18KB 下载

没有更多推荐了,返回首页

关闭
关闭