concurrent http reader

并发读, 解决可能出现的超时

#! coding: utf-8
import sys
import os
import random
import time
import requests
from io import BytesIO
import sys
from concurrent.futures import ProcessPoolExecutor, wait, FIRST_EXCEPTION



def url_read(url, timeout = None):
    pid = os.getpid()
    msg = ''
    t = time.time()
    status = 'unknown'
    result = None
    try:
        r = requests.get(url, timeout=(timeout, timeout))
        result = BytesIO(r.content).read()
        status = 'success'

    except Exception, e:
        msg = str(e)
        status = 'fail'
    
    latency = time.time() - t
    return {'pid': pid, 'status': status, 'latency': latency, 'msg': msg, 'result': result}

class ConReader(object):
    def __init__(self, worker_num = 2, quiet = True):
        super(ConReader, self).__init__()
        self.pool = ProcessPoolExecutor(max_workers = worker_num)
        self.__quiet = quiet
        self.__latency = None

    def __del__(self):
        self.pool.shutdown()

    def __prompt(self, msg):
        if self.__quiet:
            return
        sys.stdout.write(msg)
    

    def read(self, url, timeout = 1, concurrent = 2):
        self.__latency = None
        futures = [ self.pool.submit(url_read, url, timeout) for i in xrange(0, concurrent)]

        processed = []
        while True:
            finished, unfinished = wait(futures, timeout = timeout, return_when = FIRST_COMPLETED)
            for f in finished:
                if f.result()['status'] == 'success':
                    self.__prompt('worker:%s status:%s, latency:%s\n' % (f.result()['pid'], f.result()['status'], f.result()['latency']))
                    self.__latency = f.result()['latency']
                    return f.result()['result']
                elif f not in processed:
                    self.__prompt('worker:%s status:%s, msg:%s\n' % (f.result()['pid'], f.result()['status'], f.result()['msg']))
                    processed.append(f)

            if len(finished) == concurrent:
                msg = ';'.join([ f.result()['msg'] for f in finished]) 
                raise Exception(msg)

    def latency(self):
        return self.__latency



if __name__ == '__main__':
    url = 'https://www.baidu.com/img/bd_logo1.png'

    run_loop = int(sys.argv[1]) if len(sys.argv) > 1 else 1
    timeout = float(sys.argv[2]) if len(sys.argv) > 2 else 1.0
    reader = ConReader(quiet = False)

    for i in xrange(0, run_loop):
        print '---- loop:%s ------' % i
        try:
            reader.read(url, timeout)
        except Exception, e:
            import traceback
            traceback.print_exc()
            print e

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值