python多进程分块读取文件

# -*- coding: utf-8 -*-

import urlparse
import datetime
import os
from multiprocessing import Process,Queue,Array,RLock

WORKERS = 6
BLOCK_SIZE = 0
FILE_SIZE = 0
FILE_NAME = 'try.log'

def getFilesize(file):
    global FILE_SIZE
    fs = open(file,'r')
    fs.seek(0,os.SEEK_END)
    FILE_SIZE = fs.tell()
    fs.close()

def process_found(pid,array,rlock):
    global FILE_SIZE,BLOCK_SIZE
    fs = open(FILE_NAME,'rb')
    try:
        rlock.acquire()
        begin = array[0]
        end = (begin + BLOCK_SIZE)
        print begin,end
        if begin >= FILE_SIZE:
            print 'begin',begin
            array[0] = begin
            raise Exception('end of file')
        if end < FILE_SIZE:
            fs.seek(end)
            fs.readline()
            end = fs.tell()
        if end >= FILE_SIZE:
            end = FILE_SIZE
        array[0] = end
        print '-------------',begin,end
    except Exception, e:
        print e.__class__.__name__,str(e)
        return
    finally:
        rlock.release()

    fs.seek(begin)
    pos = begin
    fd = open('tmp_pid'+str(pid)+'_jobs','wb')
    while pos < end:
        fd.write(fs.readline())
        pos = fs.tell()

    fs.close()
    fd.close()

def main():
    global FILE_SIZE,BLOCK_SIZE,WORKERS,FILE_NAME
    getFilesize(FILE_NAME)
    BLOCK_SIZE = FILE_SIZE/WORKERS
    print FILE_SIZE,BLOCK_SIZE
    rlock = RLock()
    array = Array('l',WORKERS)
    array[0] = 0
    process=[]
    for i in range(WORKERS):
        p=Process(target=process_found, args=[i,array,rlock])
        process.append(p)
    for i in range(WORKERS):
        process[i].start()
    for i in range(WORKERS):
        process[i].join()

if __name__ == '__main__':
    main()


 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值