一、使用同步阻塞io完成http请求
import socket
from urllib.parse import urlparse
def get_url(url):
# 通过socket请求html
url = urlparse(url)
host = url.netloc
path = url.path
if path == "":
path = "/"
# 建立socket连接
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect((host, 80))
client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8"))
data = b""
while True:
d = client.recv(1024)
if d:
data +=d
else:
break
data = data.decode("utf8")
print(data)
client.close()
if __name__ == "__main__":
get_url("http://www.baidu.com")
二、使用同步非阻塞式io完成http请求
import socket
from urllib.parse import urlparse
def get_url(url):
# 通过socket请求html
url = urlparse(url)
host = url.netloc
path = url.path
if path == "":
path = "/"
# 建立socket连接
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False)
try:
client.connect((host, 80))
except BlockingIOError as e:
pass
while True:
try:
client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8"))
break
except OSError as e:
pass
data = b""
while True:
try:
d = client.recv(1024)
except BlockingIOError as e:
continue
if d:
data += d
else:
break
data = data.decode("utf8")
print(data)
client.close()
if __name__ == "__main__":
get_url("http://www.baidu.com")
三、使用多路复用io完成http请求
import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
selector = DefaultSelector()
# 使用select完成http请求
class Fetcher:
def connected(self, key):
# 注销事件
selector.unregister(key.fd)
self.client.send(
"GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode('utf-8'))
selector.register(self.client.fileno(), EVENT_READ, self.readable)
def readable(self, key):
d = self.client.recv(1024)
if d:
self.data += d
else:
# 注销
selector.unregister(key.fd)
self.data = self.data.decode('utf-8')
html_data = self.data.split('\r\n\r\n')[1]
#print(self.data)
print(html_data)
self.client.close()
def get_url(self, url):
# 通过socket请求html
url = urlparse(url)
self.host = url.netloc
self.path = url.path
self.data = b""
if self.path == '':
self.path = '/'
# 建立连接
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.client.setblocking(False)
try:
self.client.connect((self.host, 80))
except BlockingIOError as e:
pass
# 注册事件
selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
"""
register(fileobj, events, data=None)
fileobj:文件描述符
events:监听事件
data:回调函数
"""
def loop():
# 事件循环,不停的请求socket的状态并调用对应的回调函数
# 1.select本身是不支持register模式。
# 2.socket状态变化以后的回调是由程序员完成的。
while True:
# 在windows上,监听的文件对象列表不可以为空;
# 因此windows上会报错需要用stop进行处理,但是linux下不会报错
try:
events = selector.select() # 默认是阻塞,有活动连接就返回活动的连接列表
except OSError as e:
break
for key, mask in events:
call_back = key.data
call_back(key)
if __name__ == "__main__":
fetcher = Fetcher()
fetcher.get_url("http://www.baidu.com")
loop()
四、多路复用io与同步阻塞io性能对比
- 使用同步阻塞获取20个html页面
import socket
from urllib.parse import urlparse
def get_url(url):
# 通过socket请求html
url = urlparse(url)
host = url.netloc
path = url.path
if path == "":
path = "/"
# 建立socket连接
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect((host, 80))
client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8"))
data = b""
while True:
d = client.recv(1024)
if d:
data += d
else:
break
data = data.decode("utf8")
# print(data)
print("html获取成功")
client.close()
if __name__ == "__main__":
get_url("http://www.baidu.com")
import time
start_time = time.time()
for url in range(20):
url = "http://shop.projectsedu.com/goods/{}/".format(url)
get_url(url)
print("last in {}".format(time.time() - start_time))
"""
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
html获取成功
last in 18.078646898269653
Process finished with exit code 0
"""
- 使用多路复用io获取20个html页面
# 通过非阻塞io实现http请求
import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
selector = DefaultSelector()
# 使用select完成http请求
class Fetcher:
def connected(self, key):
# 注销事件
selector.unregister(key.fd)
self.client.send(
"GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode('utf-8'))
selector.register(self.client.fileno(), EVENT_READ, self.readable)
def readable(self, key):
d = self.client.recv(1024)
if d:
self.data += d
else:
# 注销
selector.unregister(key.fd)
self.data = self.data.decode('utf-8')
html_data = self.data.split('\r\n\r\n')[1]
# print(self.data)
# print(html_data)
print("获取成功")
self.client.close()
def get_url(self, url):
# 通过socket请求html
url = urlparse(url)
self.host = url.netloc
self.path = url.path
self.data = b""
if self.path == '':
self.path = '/'
# 建立连接
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.client.setblocking(False)
try:
self.client.connect((self.host, 80))
except BlockingIOError as e:
pass
# 注册事件
selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
"""
register(fileobj, events, data=None)
fileobj:文件描述符
events:监听事件
data:回调函数
"""
def loop():
# 事件循环,不停的请求socket的状态并调用对应的回调函数
# 1.select本身是不支持register模式。
# 2.socket状态变化以后的回调是由程序员完成的。
while True:
# 在windows上,监听的文件对象列表不可以为空;
# 因此windows上会报错需要用stop进行处理,但是linux下不会报错
try:
events = selector.select() # 默认是阻塞,有活动连接就返回活动的连接列表
except OSError as e:
break
for key, mask in events:
call_back = key.data
call_back(key)
if __name__ == '__main__':
import time
start_time = time.time()
for url in range(20):
url = "http://shop.projectsedu.com/goods/{}/".format(url)
fetcher = Fetcher()
fetcher.get_url(url)
loop()
print("last in {}".format(time.time() - start_time))
"""
运行结果:
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
获取成功
last in 2.192136287689209
Process finished with exit code 0
"""
[参考视频]https://www.bilibili.com/video/av64192449/?p=76
[参考博客]https://www.cnblogs.com/zydeboke/p/11328584.html
https://www.cnblogs.com/zzzlw/p/9384308.html#top