python 异步io框架_python 异步IO

线程池的使用+requests模块+回调函数

"""

可以实现并发

但是,请求发送出去后和返回之前,中间时期线程空闲

编写方式:

- 直接返回处理

- 通过回调函数处理

"""

########### 编写方式一 ###########

"""

from concurrent.futures import ThreadPoolExecutor

import requests

import time

def task(url):

response = requests.get(url)

print(url,response)

# 写正则表达式

pool = ThreadPoolExecutor(7)

url_list = [

'http://www.cnblogs.com/wupeiqi',

'http://huaban.com/favorite/beauty/',

'http://www.bing.com',

'http://www.zhihu.com',

'http://www.sina.com',

'http://www.baidu.com',

'http://www.autohome.com.cn',

]

for url in url_list:

pool.submit(task,url)

pool.shutdown(wait=True)

"""

########### 编写方式二 ###########

from concurrent.futures import ThreadPoolExecutor

import requests

import time

def task(url):

"""

下载页面

:param url:

:return:

"""

response = requests.get(url)

return response

def done(future,*args,**kwargs):

response = future.result()

print(response.status_code,response.content)

pool = ThreadPoolExecutor(7)

url_list = [

'http://www.cnblogs.com/wupeiqi',

'http://huaban.com/favorite/beauty/',

'http://www.bing.com',

'http://www.zhihu.com',

'http://www.sina.com',

'http://www.baidu.com',

'http://www.autohome.com.cn',

]

for url in url_list:

v = pool.submit(task,url)

v.add_done_callback(done) # 添加回调函数

pool.shutdown(wait=True)

进程池的使用+requests模块+回调函数

"""

可以实现并发

但是,请求发送出去后和返回之前,中间时期进程空闲

编写方式:

- 直接返回处理

- 通过回调函数处理

"""

########### 编写方式一 ###########

"""

from concurrent.futures import ProcessPoolExecutor

import requests

import time

def task(url):

response = requests.get(url)

print(url,response)

# 写正则表达式

pool = ProcessPoolExecutor(7)

url_list = [

'http://www.cnblogs.com/wupeiqi',

'http://huaban.com/favorite/beauty/',

'http://www.bing.com',

'http://www.zhihu.com',

'http://www.sina.com',

'http://www.baidu.com',

'http://www.autohome.com.cn',

]

for url in url_list:

pool.submit(task,url)

pool.shutdown(wait=True)

"""

########### 编写方式二 ###########

from concurrent.futures import ProcessPoolExecutor

import requests

import time

def task(url):

response = requests.get(url)

return response

def done(future,*args,**kwargs):

response = future.result()

print(response.status_code,response.content)

pool = ProcessPoolExecutor(7)

url_list = [

'http://www.cnblogs.com/wupeiqi',

'http://huaban.com/favorite/beauty/',

'http://www.bing.com',

'http://www.zhihu.com',

'http://www.sina.com',

'http://www.baidu.com',

'http://www.autohome.com.cn',

]

for url in url_list:

v = pool.submit(task,url)

v.add_done_callback(done)

pool.shutdown(wait=True)

asyncio + aiohttp

import asyncio

"""

@asyncio.coroutine

def task():

print('before...task......')

yield from asyncio.sleep(5) # 发送Http请求,支持TCP获取结果..

print('end...task......')

tasks = [task(), task()]

loop = asyncio.get_event_loop()

loop.run_until_complete(asyncio.gather(*tasks))

loop.close()

"""

"""

import asyncio

@asyncio.coroutine

def task(host, url='/'):

print('start',host,url)

reader, writer = yield from asyncio.open_connection(host, 80)

request_header_content = "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" % (url, host,)

request_header_content = bytes(request_header_content, encoding='utf-8')

writer.write(request_header_content)

yield from writer.drain()

text = yield from reader.read()

print('end',host, url, text)

writer.close()

tasks = [

task('www.cnblogs.com', '/wupeiqi/'),

task('dig.chouti.com', '/pic/show?nid=4073644713430508&lid=10273091')

]

loop = asyncio.get_event_loop()

results = loop.run_until_complete(asyncio.gather(*tasks))

loop.close()

"""

"""

import aiohttp

import asyncio

@asyncio.coroutine

def fetch_async(url):

print(url)

response = yield from aiohttp.request('GET', url)

print(url, response)

response.close()

tasks = [fetch_async('http://www.baidu.com/'), fetch_async('http://www.chouti.com/')]

event_loop = asyncio.get_event_loop()

results = event_loop.run_until_complete(asyncio.gather(*tasks))

event_loop.close()

"""

import asyncio

import requests

@asyncio.coroutine

def task(func, *args):

print(func,args)

loop = asyncio.get_event_loop()

future = loop.run_in_executor(None, func, *args) # requests.get('http://www.cnblogs.com/wupeiqi/')

response = yield from future

print(response.url, response.content)

tasks = [

task(requests.get, 'http://www.cnblogs.com/wupeiqi/'),

task(requests.get, 'http://dig.chouti.com/pic/show?nid=4073644713430508&lid=10273091')

]

loop = asyncio.get_event_loop()

results = loop.run_until_complete(asyncio.gather(*tasks))

loop.close()

不管是何种异步IO框架,内部的原码都是类似的,设置非堵塞IO,循环检测socket对象的状态,就形成了异步IO

IO多路复用,就是用select监听多个socket对象

异步IO,就是非堵塞IO加IO多路复用

而我们自己就可以编写出一个 自定义异步IO框架

import socket

import select

# ########################## HTTP请求本质,阻塞 ##########################

"""

sk = socket.socket()

# 1.连接

sk.connect(('www.baidu.com',80,)) # IO阻塞

print('连接成功了...')

# 2. 连接成功发送消息

sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n')

# sk.send(b'POST / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\nk1=v1&k2=v2')

# 3. 等待着服务端响应

data = sk.recv(8096) # IO阻塞

print(data)

# 关闭连接

sk.close()

"""

# ########################## HTTP请求本质,非阻塞 ##########################

"""

sk = socket.socket()

sk.setblocking(False)

# 1.连接

try:

sk.connect(('www.baidu.com',80,)) # IO阻塞

print('连接成功了...')

except BlockingIOError as e:

print(e)

# 2. 连接成功发送消息

sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n')

# sk.send(b'POST / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\nk1=v1&k2=v2')

# 3. 等待着服务端响应

data = sk.recv(8096) # IO阻塞

print(data)

# 关闭连接

sk.close()

"""

class HttpRequest:

def __init__(self,sk,host,callback):

self.socket = sk

self.host = host

self.callback = callback

def fileno(self):

return self.socket.fileno()

class HttpResponse:

def __init__(self,recv_data):

self.recv_data = recv_data

self.header_dict = {}

self.body = None

self.initialize()

def initialize(self):

headers, body = self.recv_data.split(b'\r\n\r\n', 1)

self.body = body

header_list = headers.split(b'\r\n')

for h in header_list:

h_str = str(h,encoding='utf-8')

v = h_str.split(':',1)

if len(v) == 2:

self.header_dict[v[0]] = v[1]

class AsyncRequest:

def __init__(self):

self.conn = []

self.connection = [] # 用于检测是否已经连接成功

def add_request(self,host,callback):

try:

sk = socket.socket()

sk.setblocking(0)

sk.connect((host,80,))

except BlockingIOError as e:

pass

request = HttpRequest(sk,host,callback)

self.conn.append(request)

self.connection.append(request)

def run(self):

while True:

rlist,wlist,elist = select.select(self.conn,self.connection,self.conn,0.05)

for w in wlist:

print(w.host,'连接成功...')

# 只要能循环到,表示socket和服务器端已经连接成功

tpl = "GET / HTTP/1.0\r\nHost:%s\r\n\r\n" %(w.host,)

w.socket.send(bytes(tpl,encoding='utf-8'))

self.connection.remove(w)

for r in rlist:

# r,是HttpRequest

recv_data = bytes()

while True:

try:

chunck = r.socket.recv(8096)

recv_data += chunck

except Exception as e:

break

response = HttpResponse(recv_data)

r.callback(response)

r.socket.close()

self.conn.remove(r)

if len(self.conn) == 0:

break

def f1(response):

print('保存到文件',response.header_dict)

def f2(response):

print('保存到数据库', response.header_dict)

url_list = [

{'host':'www.baidu.com','callback': f1},

{'host':'cn.bing.com','callback': f2},

{'host':'www.cnblogs.com','callback': f2},

]

req = AsyncRequest()

for item in url_list:

req.add_request(item['host'],item['callback'])

req.run()

note

一个东西你只会用,不会懂得原理的时候,心里没有底

很多人都停留在只会用的阶段

多线程和多进程的区别,

python 中同一个时刻只能有一个线程进入CPU执行

IO密集型用线程,计算密集型用进程

爬虫性能相关

- 单线程堵塞

requestss.get(''''') 一直等待回复,堵塞

事件循环

IO 多路复用:r,w,e ==> 监听多个socket对象 利用其特性可以开发很多异步模块

异步IO, 非堵塞的socket+IO多路复用

- 非堵塞的socket

- select[自己对象],w,r

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值