class HttpRequest:
def __init__(self,sk,host,callback):
self.socket = sk
self.host = host
self.callback = callback
def fileno(self):
return self.socket.fileno() #返回文件描述符
class HttpResponse:
def __init__(self,recv_data):
self.recv_data = recv_data
self.header_dict = {}
self.body = None
self.initialize() #执行方法
def initialize(self):
headers, body = self.recv_data.split(b'\r\n\r\n', 1) #分离请求头请求体,b表示字节类型分割,1因为请求体可能有,只找第一个
self.body = body
header_list = headers.split(b'\r\n') #分离请求头
for h in header_list:
h_str = str(h,encoding='utf-8') #先变成字符串
v = h_str.split(':',1)
if len(v) == 2: #部分响应头格式没有冒号
self.header_dict[v[0]] = v[1] #响应头字典形式,所有的外部框架都是这样来做的
class AsyncRequest:
def __init__(self):
self.conn = [] # 用于检测是否已经返回
self.connection = [] # 用于检测是否已经连接成功
def add_request(self,host,callback):
try:
sk = socket.socket()
sk.setblocking(0)
sk.connect((host,80,))
except BlockingIOError as e:
pass
request = HttpRequest(sk,host,callback)
self.conn.append(request)
self.connection.append(request)
def run(self):
while True: #事件循环
rlist,wlist,elist = select.select(self.conn,self.connection,self.conn,0.05)
for w in wlist:
print(w.host,'连接成功...')
# 只要能循环到,表示socket和服务器端已经连接成功
tpl = "GET / HTTP/1.0\r\nHost:%s\r\n\r\n" %(w.host,) #通过封装socket对象为httprequest对象,就能传host
w.socket.send(bytes(tpl,encoding='utf-8'))
self.connection.remove(w) #已发送数据的从列表清除
for r in rlist:
# r是HttpRequest对象
recv_data = bytes() #空字节,
while True: #一直接收数据
try:
chunck = r.socket.recv(8096) #8096是大小,超过大小的分为一块块chunks接受,没数据则报错
recv_data += chunck
except Exception as e: #没数据执行这步
break
# print(r.host,'有数据返回',recv_data)
response = HttpResponse(recv_data)
r.callback(response)
r.socket.close()
self.conn.remove(r) #不需再监听
if len(self.conn) == 0:
break
def f1(response):
print('保存到文件',response.header_dict) #打印响应头字典
def f2(response):
print('保存到数据库', response.header_dict)
url_list = [
{'host':'www.baidu.com','callback': f1}, #用用户自己选择哪个回调函数处理返回结果
{'host':'cn.bing.com','callback': f2},
{'host':'www.cnblogs.com','callback': f2},
]
req = AsyncRequest()
for item in url_list:
req.add_request(item['host'],item['callback'])
req.run()