python模版

最近代码的重复量太高了 找个时间整理一下 生成一个方便的模块模板


#!/usr/bin/env python
#-*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')


import os,traceback,<span style="font-family: Arial, Helvetica, sans-serif;">logging</span>
<pre name="code" class="python">import multiprocessing
<pre name="code" class="python">import re

 import jiebaimport ConfigParser,import datetimedef myprocess(num,indir,titles,lock):try:fin = open(indir + os.sep + str(num),'r')cnt0 = 0for line in fin:cnt0 += 1if cnt0 %100 == 0:print num,cnt0if cnt0 == 10000:breakdata = eval(line.strip())us_id = int(data[21])title = data[8]if title.strip() == '':continuewith lock:if title in titles:titles[title].append(us_id) else:titles[title]=[us_id]print num,'END'except Exception,e:traceback.print_exc()def func():indir = '/root/data/bids/data_all'outdir = './titles1119'#if not os.path.exists(outdir):# os.mkdir(outdir)start,end,add = 0,20,1manager = multiprocessing.Manager()lock = manager.Lock()titles = manager.dict()#开启进程池pool = multiprocessing.Pool(processes = 16)for num in range(start,end,add):pool.apply_async(myprocess,[num,indir,titles,lock])pool.close()pool.join()titles = dict(titles)with open(outdir,'w') as fout:for title in titles:print >> fout,'%s\t%s'%(title,titles[title])if len(titles[title])>1:print titles[title]if __name__ == '__main__':program = os.path.basename(sys.argv[0])logger = logging.getLogger(program)logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s',filename = program+'.log',filemode='a')logging.root.setLevel(level=logging.INFO)logger.info("start running %s" % ' '.join(sys.argv))func()logger.info("end\t running %s" % ' '.join(sys.argv)) 
 




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值