搭建本地服务器
# coding=utf-8
from http.server import BaseHTTPRequestHandler, HTTPServer
from socketserver import ThreadingMixIn
import time
ENCODING = 'utf-8'
class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
"""Simple multi-threaded HTTP server.
"""
pass
class MyRequestHandler(BaseHTTPRequestHandler):
"""Very simple request handler. Only supports GET.
"""
def do_GET(self): # pylint: disable=invalid-name
"""Respond after seconds given in path.
"""
try:
seconds = float(self.path[1:])
except ValueError:
seconds = 0.0
if seconds < 0:
seconds = 0.0
text = "Waited for {:4.2f} seconds.\nThat's all.\n"
msg = text.format(seconds).encode(ENCODING)
time.sleep(seconds)
self.send_response(200)
self.send_header("Content-type", 'text/plain; charset=utf-8')
self.send_header("Content-length", str(len(msg)))
self.end_headers()
self.wfile.write(msg)
def run(server_class=ThreadingHTTPServer,
handler_class=MyRequestHandler,
port=8000):
"""Run the simple server on given port.
"""
server_address = ('', port)
httpd = server_class(server_address, handler_class)
print('Serving from port {} ...'.format(port))
httpd.serve_forever()
if __name__ == '__main__':
run()
1.urlopen同步访问
# coding=utf-8
from urllib.request import urlopen
import time
ENCODING = 'ISO-8859-1'
def get_encoding(http_response):
'''
找出编码
'''
content_type = http_response.getheader('Content-type')
for entry in content_type.split(';'):
if entry.strip().startswith('charset'):
return entry.split('=')[1].strip()
return ENCODING
def get_page(host, port, wait=0):
'''
拼接链接
'''
full_url = '{}:{}/{}'.format(host, port, wait)
with urlopen(full_url) as http_response:
html = http_response.read().decode(get_encoding(http_response))
return html
def main(show_time=True):
start = time.perf_counter()
# 初始化数据
port = '8000'
waits = [1, 5, 3, 2]
host = 'http://localhost'
# 执行同步爬虫,并返回源码
pages = [get_page(host, port, wait) for wait in waits]
# 打印源码
for page in pages:
print(page)
# 显示时间
duration = time.perf_counter() - start
sum_waits = sum(waits)
if show_time:
msg = '同步运行时间: {:4.2f}秒 ,预计运行时间: {:4.2f}秒'
print(msg.format(duration, sum_waits))
if __name__ == '__main__':
main()
输出:
Waited for 1.00 seconds.
That's all.
Waited for 5.00 seconds.
That's all.
Waited for 3.00 seconds.
That's all.
Waited for 2.00 seconds.
That's all.
同步运行时间: 15.09秒 ,预计运行时间: 11.00秒
2.open_connection异步简单访问
# coding=utf-8
import asyncio
from contextlib import closing
import time
import aiohttp
ENCODING = 'ISO-8859-1'
# 本地搭建异步获取
def get_encoding(header):
'''
找出编码
'''
for line in header:
if line.lstrip().startswith('Content-type'):
for entry in line.split(';'):
if entry.strip().startswith('charset'):
return entry.split('=')[1].strip()
return ENCODING
# 本地搭建异步获取
async def get_page(host, wait=0, port=8000):
'''
异步获取“网页”。
'''
reader, writer = await asyncio.open_connection(host, port)
writer.write(b'\r\n'.join([
'GET /{} HTTP/1.0'.format(wait).encode(ENCODING),
b'Host: %b' % host.encode(ENCODING),
b'Connection: close',
b'', b''
]))
header = [] # 保存响应头
msg_lines = []
# 获取响应头并添加至列表中
async for raw_line in reader:
line = raw_line.decode(ENCODING).strip()
if not line.strip():
break
header.append(line)
# 找出编码
encoding = get_encoding(header)
# 读取源码信息
async for raw_line in reader:
line = raw_line.decode(encoding).strip()
msg_lines.append(line)
writer.close()
return '\n'.join(msg_lines)
def main(show_time=True):
start = time.perf_counter()
# 初始化数据
tasks = []
waits = [1, 5, 3, 2]
host = 'localhost'
# 执行并发协程
with closing(asyncio.get_event_loop()) as loop:
for wait in waits:
tasks.append(get_page(host, wait))
pages = loop.run_until_complete(asyncio.gather(*tasks))
# 打印源码
for page in pages:
print(page)
# 显示时间
duration = time.perf_counter() - start
sum_waits = sum(waits)
if show_time:
msg = '异步运行时间: {:4.2f}秒 ,预计运行时间: {:4.2f}秒'
print(msg.format(duration, sum_waits))
if __name__ == '__main__':
main()
输出:
Waited for 1.00 seconds.
That's all.
Waited for 5.00 seconds.
That's all.
Waited for 3.00 seconds.
That's all.
Waited for 2.00 seconds.
That's all.
异步运行时间: 6.03秒 ,预计运行时间: 11.00秒
3.aiohttp并发测试速度
# coding=utf-8
import asyncio
import time
import aiohttp
from contextlib import closing
# 直接获取源码
async def fetch_page(session, url, wait=0):
async with session.get(url) as response:
assert response.status == 200
return await response.text()
async def main(loop, show_time=True):
start = time.perf_counter()
# 初始化数据
tasks = []
port = 8000
waits = [1, 5, 3, 2]
host = 'http://localhost'
# 用相同的会话进行访问
async with aiohttp.ClientSession(loop=loop) as session:
# 添加协程到列表中
for wait in waits:
url = '{}:{}/{}'.format(host, port, wait)
tasks.append(fetch_page(session, url))
# 并发执行协程
pages = await asyncio.gather(*tasks)
# 打印源码
for page in pages:
print(page)
# 显示时间
duration = time.perf_counter() - start
sum_waits = sum(waits)
if show_time:
msg = '异步运行时间: {:4.2f}秒 ,预计运行时间: {:4.2f}秒'
print(msg.format(duration, sum_waits))
if __name__ == "__main__":
with closing(asyncio.get_event_loop()) as loop:
loop.run_until_complete(main(loop))
输出:
Waited for 1.00 seconds.
That's all.
Waited for 5.00 seconds.
That's all.
Waited for 3.00 seconds.
That's all.
Waited for 2.00 seconds.
That's all.
异步运行时间: 5.03秒 ,预计运行时间: 11.00秒