# _*_coding:utf-8_*_
import time, threading, ConfigParser‘‘‘Reader类,继承threading.Thread
@__init__方法初始化
@run方法实现了读文件的操作‘‘‘classReader(threading.Thread):
def __init__(self, file_name, start_pos, end_pos):
super(Reader, self).__init__()
self.file_name=file_name
self.start_pos=start_pos
self.end_pos=end_pos
def run(self):
fd= open(self.file_name, ‘r‘)‘‘‘该if块主要判断分块后的文件块的首位置是不是行首,
是行首的话,不做处理
否则,将文件块的首位置定位到下一行的行首‘‘‘ if self.start_pos != 0:
fd.seek(self.start_pos-1)if fd.read(1) != ‘\n‘:
line=fd.readline()
self.start_pos=fd.tell()
fd.seek(self.start_pos)‘‘‘对该文件块进行处理‘‘‘ while (self.start_pos <=self.end_pos):
line=fd.readline()‘‘‘ dosomthing‘‘‘ self.start_pos =fd.tell()‘‘‘对文件进行分块,文件块的数量和线程数量一致‘‘‘class Partition(object):
def __init__(self, file_name, thread_num):
self.file_name=file_name
self.block_num=thread_num
def part(self):
fd= open(self.file_name, ‘r‘)
fd.seek(0, 2)
pos_list=[]
file_size=fd.tell()
block_size= file_size/self.block_num
start_pos= 0
for i inrange(self.block_num):if i == self.block_num-1:
end_pos= file_size-1pos_list.append((start_pos, end_pos))breakend_pos= start_pos+block_size-1
if end_pos >=file_size:
end_pos= file_size-1
if start_pos >=file_size:breakpos_list.append((start_pos, end_pos))
start_pos= end_pos+1fd.close()returnpos_listif __name__ == ‘__main__‘:‘‘‘读取配置文件‘‘‘ config =ConfigParser.ConfigParser()
config.readfp(open(‘conf.ini‘))
#文件名
file_name= config.get(‘info‘, ‘fileName‘)
#线程数量
thread_num= int(config.get(‘info‘, ‘threadNum‘))
#起始时间
start_time=time.clock()
p=Partition(file_name, thread_num)
t=[]
pos=p.part()
#生成线程for i inrange(thread_num):
t.append(Reader(file_name,*pos[i]))
#开启线程for i inrange(thread_num):
t[i].start()for i inrange(thread_num):
t[i].join()
#结束时间
end_time=time.clock()
print"Cost time is %f" % (end_time - start_time)