效率对比
异步爬虫
# 协程方式 爬取非小号数据
# https://www.feixiaohao.com/list_1.html
import time
import json
import asyncio
import csv
import aiohttp
start_time = time.time()
writer = csv.writer(open('非小号.csv','w',encoding='utf-8',newline=''))
writer.writerow(['current_price','fullname','name','code','url'])
tasks = []
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3722.400 QQBrowser/10.5.3751.400',
}
async def main(url):
async with aiohttp.ClientSession() as session:
async with session.get(
url,
headers=headers
) as response:
response = await response.text()
json_list =json.loads(response)
for data in json_list['data']:
current_price = data['current_price']
fullname = data['fullname']
name = data['name']
code = data['code']
print('价格/¥:',current_price)
print('币名:',fullname)
print()
url ='https://www.feixiaohao.com/currencies/{}/'.format(data['code'])
writer.writerow([current_price,fullname,name,code,url])
def run():
for i in range(75):
url = 'https://dncapi.bqrank.net/api/coin/web-coinrank?page={}&type=-1&pagesize=100&webp=1'.format(i+1)
task = loop.create_task(main(url))
tasks.append(task)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
run()
loop.run_until_complete(asyncio.wait(tasks))
print('耗时:',time.time() - start_time)
#耗时: 1.7821018695831299
#耗时: 1.428081750869751
普通爬虫
# 爬取非小号数据
# https://www.feixiaohao.com/list_1.html
import time
import json
import csv
import requests
start_time = time.time()
writer = csv.writer(open('非小号2.csv','w',encoding='utf-8',newline=''))
writer.writerow(['current_price','fullname','name','code','url'])
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3722.400 QQBrowser/10.5.3751.400',
}
def main():
for i in range(75):
url = 'https://dncapi.bqrank.net/api/coin/web-coinrank?page={}&type=-1&pagesize=100&webp=1'.format(i + 1)
response = requests.get(url,headers=headers)
json_list =json.loads(response.text)
for data in json_list['data']:
current_price = data['current_price']
fullname = data['fullname']
name = data['name']
code = data['code']
print('价格/¥:',current_price)
print('币名:',fullname)
print()
url ='https://www.feixiaohao.com/currencies/{}/'.format(data['code'])
writer.writerow([current_price,fullname,name,code,url])
if __name__ == '__main__':
main()
print('耗时:', time.time() - start_time)
#耗时: 21.724242448806763
#耗时: 21.267216205596924
文件对比
异步
同步
本来想试多几次测试时间,试多了被禁止访问。用协程方式效率快一点,写入文件不是按顺序,感兴趣可以自己试试。