以 北京市发展和改革委员会 为例子实现自定义进度条爬虫 效果如下:
# -*- coding: utf-8 -*-
"""
# @Time : 2021/8/2 9:15
# @Author : ChenLvLei
# @Email : 2516455367@qq.com
# @FileName : test
# @Description :
"""
import re
import sys
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/91.0.4472.77 Safari/537.36', }
def get_keys():
return '为了不必要麻烦 key 隐藏。'
def get_data():
WorkToken, Time_ = get_keys()
url = 'http://fgw.beijing.gov.cn/so/ss/s'
payload = {'siteCode': '1100000011',
'tab': 'all',
'timestamp': '{}'.format(Time_),
'wordToken': '{}'.format(WorkToken.group(1)),
'page': '2',
'pageSize': '20',
'qt': '能源消费',
'timeOption': '0',
'sort': 'relevance',
'keyPlace': '0',
'fileType': ''}
headers.update({'suid': '{}'.format(WorkToken.group(2))})
response = requests.request("POST", url, headers=headers, data=payload)
result_list = response.json(
)['data']['search']['searchResult']['resultList']
for i, n in enumerate(result_list):
x = 100
y = (i + 1) / len(result_list)
done = int(x * y)
sys.stdout.write("\r[%s%s] %d%%" % ('☺' * done, ' ' * (100 - done), x * y) + '\n')
sys.stdout.write(f'{n}' + '\n')
sys.stdout.flush()
if __name__ == '__main__':
get_data()
学习交流请私: