关闭

python模版

102人阅读 评论(0) 收藏 举报

最近代码的重复量太高了 找个时间整理一下 生成一个方便的模块模板


#!/usr/bin/env python
#-*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')


import os,traceback,<span style="font-family: Arial, Helvetica, sans-serif;">logging</span>
<pre name="code" class="python">import multiprocessing
<pre name="code" class="python">import re


import jiebaimport ConfigParser,import datetimedef myprocess(num,indir,titles,lock):try:fin = open(indir + os.sep + str(num),'r')cnt0 = 0for line in fin:cnt0 += 1if cnt0 %100 == 0:print num,cnt0if cnt0 == 10000:breakdata = eval(line.strip())us_id = int(data[21])title
 = data[8]if title.strip() == '':continuewith lock:if title in titles:titles[title].append(us_id) else:titles[title]=[us_id]print num,'END'except Exception,e:traceback.print_exc()def func():indir = '/root/data/bids/data_all'outdir = './titles1119'#if not os.path.exists(outdir):#
 os.mkdir(outdir)start,end,add = 0,20,1manager = multiprocessing.Manager()lock = manager.Lock()titles = manager.dict()#开启进程池pool = multiprocessing.Pool(processes = 16)for num in range(start,end,add):pool.apply_async(myprocess,[num,indir,titles,lock])pool.close()pool.join()titles
 = dict(titles)with open(outdir,'w') as fout:for title in titles:print >> fout,'%s\t%s'%(title,titles[title])if len(titles[title])>1:print titles[title]if __name__ == '__main__':program = os.path.basename(sys.argv[0])logger = logging.getLogger(program)logging.basicConfig(format='%(asctime)s:
 %(levelname)s: %(message)s',filename = program+'.log',filemode='a')logging.root.setLevel(level=logging.INFO)logger.info("start running %s" % ' '.join(sys.argv))func()logger.info("end\t running %s" % ' '.join(sys.argv))





0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:10428次
    • 积分:554
    • 等级:
    • 排名:千里之外
    • 原创:47篇
    • 转载:0篇
    • 译文:0篇
    • 评论:3条
    最新评论