并发读, 解决可能出现的超时
#! coding: utf-8
import sys
import os
import random
import time
import requests
from io import BytesIO
import sys
from concurrent.futures import ProcessPoolExecutor, wait, FIRST_EXCEPTION
def url_read(url, timeout = None):
pid = os.getpid()
msg = ''
t = time.time()
status = 'unknown'
result = None
try:
r = requests.get(url, timeout=(timeout, timeout))
result = BytesIO(r.content).read()
status = 'success'
except Exception, e:
msg = str(e)
status = 'fail'
latency = time.time() - t
return {'pid': pid, 'status': status, 'latency': latency, 'msg': msg, 'result': result}
class ConReader(object):
def __init__(self, worker_num = 2, quiet = True):
super(ConReader, self).__init__()
self.pool = ProcessPoolExecutor(max_workers = worker_num)
self.__quiet = quiet
self.__latency = None
def __del__(self):
self.pool.shutdown()
def __prompt(self, msg):
if self.__quiet:
return
sys.stdout.write(msg)
def read(self, url, timeout = 1, concurrent = 2):
self.__latency = None
futures = [ self.pool.submit(url_read, url, timeout) for i in xrange(0, concurrent)]
processed = []
while True:
finished, unfinished = wait(futures, timeout = timeout, return_when = FIRST_COMPLETED)
for f in finished:
if f.result()['status'] == 'success':
self.__prompt('worker:%s status:%s, latency:%s\n' % (f.result()['pid'], f.result()['status'], f.result()['latency']))
self.__latency = f.result()['latency']
return f.result()['result']
elif f not in processed:
self.__prompt('worker:%s status:%s, msg:%s\n' % (f.result()['pid'], f.result()['status'], f.result()['msg']))
processed.append(f)
if len(finished) == concurrent:
msg = ';'.join([ f.result()['msg'] for f in finished])
raise Exception(msg)
def latency(self):
return self.__latency
if __name__ == '__main__':
url = 'https://www.baidu.com/img/bd_logo1.png'
run_loop = int(sys.argv[1]) if len(sys.argv) > 1 else 1
timeout = float(sys.argv[2]) if len(sys.argv) > 2 else 1.0
reader = ConReader(quiet = False)
for i in xrange(0, run_loop):
print '---- loop:%s ------' % i
try:
reader.read(url, timeout)
except Exception, e:
import traceback
traceback.print_exc()
print e