用python写了个全国疫情中高风险地区查询

kylner

已于 2022-12-02 08:18:40 修改

阅读量4.5k

点赞数 4

分类专栏： Python 文章标签： python 开发语言

于 2022-06-16 09:24:51 首次发布

本文链接：https://blog.csdn.net/kylner/article/details/125309260

版权

Python 专栏收录该内容

8 篇文章 1 订阅

订阅专栏

最近用python写了个全国疫情中高风险地区查询的爬虫代码，分享给大家一起交流，希望得到不同思路的指教，让代码更简洁，运行效率更高。

总体思路

1、找到可供查询的源网站
2、分析、获取查询的API
3、构造API
4、获取全国中高风险地区信息
5、对信息按“省、市、街道”进行整理
6、构造查询匹配语句，判断输入的“省、市、县”是否在获取的信息中

注：输入匹配部分的代码自己总觉得不是最优解，希望能有大佬看到，指点一二。

代码部分

# -*- coding: utf-8 -*-
# @Time    : 2022/5/12 11:08
# @Author  : Kyln.Wu
# @Email   : kylnwu@qq.com
# @File    : 疫情风险地区查询.py
# @Software: PyCharm
import hashlib
import json
import difflib
import requests
import time



# 获取当前时间戳
timestamp = str(int((time.time())))
# print(timestamp)

# 定义几个解密需要用到的常量
token = '*********************' # 这里不让写全部密钥，写在注释里了：23y0ufFl5YxIyGrI8hWRUZmKkvtSjLQA
nonce = '123456789abcdefg'
passid = 'zdww'
key = "3C502C97ABDA40D0A60FBEE50FAAD1DA"


# 用python逆向Headers里要插入的zdwwsignature变量的值
def get_zdwwsignature():
    zdwwsign = timestamp + 'fTN2pfuisxTavbTuYVSsNJHetwq5bJvC' + 'QkjjtiLM2dCratiA' + timestamp
    hsobj = hashlib.sha256()
    hsobj.update(zdwwsign.encode('utf-8'))
    zdwwsignature = hsobj.hexdigest().upper()
    # print(zdwwsignature)
    return zdwwsignature


# 用python逆向Params里要插入的signatureheader变量的值
def get_signatureheader():
    has256 = hashlib.sha256()
    sign_header = timestamp + token + nonce + timestamp
    has256.update(sign_header.encode('utf-8'))
    signatureHeader = has256.hexdigest().upper()
    # print(signatureHeader)
    return signatureHeader


# 这里才是主函数，通过api接口，带入需要的参数，获取全国的数据
def get_datas():
    url = 'https://bmfw.www.gov.cn/bjww/interface/interfaceJson'
    headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        # "Content-Length": "235",
        "Content-Type": "application/json; charset=UTF-8",
        "Host": "bmfw.www.gov.cn",
        "Origin": "http://bmfw.www.gov.cn",
        "Referer": "http://bmfw.www.gov.cn/yqfxdjcx/risk.html",
        # "Sec-Fetch-Dest": "empty",
        # "Sec-Fetch-Mode": "cors",
        # "Sec-Fetch-Site": "cross-site",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
        "x-wif-nonce": "QkjjtiLM2dCratiA",
        "x-wif-paasid": "smt-application",
        "x-wif-signature": get_zdwwsignature(),
        "x-wif-timestamp": timestamp
    }

    params = {
        'appId': "NcApplication",
        'paasHeader': "zdww",
        'timestampHeader': timestamp,
        'nonceHeader': "123456789abcdefg",
        'signatureHeader': get_signatureheader(),
        'key': "3C502C97ABDA40D0A60FBEE50FAAD1DA"
    }

    resp = requests.post(url, headers=headers, json=params)
    datas = resp.text

    # 在线获取后，保存到本地，再进行本地整理操作，减少在线访问，以免被封IP
    with open('./risk_data.log', 'w', encoding='utf-8') as f:
        f.write(datas)


# 获取data中highlist部分数据，即高风险地区数据
def get_highlist(data):
    highlist = data['data']['highlist']
    return highlist


# 获取data中middlelist部分数据，即中风险地区数据
def get_middlelist(data):
    middlelist = data['data']['middlelist']
    return middlelist


# 查询中高风险地区的函数。这部分的算法不是很严谨，有待改进。
def chaxun(high_list, middle_list):
    # 用列表推导式从high_list中取出province的值，然后用set()去重，再转换成列表
    high_provinces = list(set([x['province'] for x in high_list]))
    high_citys = list(set([x['city'] for x in high_list]))
    high_countys = list(set([x['county'] for x in high_list]))
    print(f'高风险省/直辖市：{high_provinces}')
    print(f'高风险市/区：{high_citys}')
    print(f'高风险县/街道：{high_countys}')
    middle_provinces = list(set([x['province'] for x in middle_list]))
    middle_citys = list(set([x['city'] for x in middle_list]))
    middle_countys = list(set([x['county'] for x in middle_list]))
    print(f'中风险省/直辖市：{middle_provinces}')
    print(f'中风险市/区：{middle_citys}')
    print(f'中风险县/街道：{middle_countys}')

    while True:

        province_in = input('请输入来自省/直辖市：')
        if len(province_in) == 0:
            print('输入省/直辖市不能为空！')
            break
        city_in = input('请输入来自市/区：')
        if len(city_in) == 0:
            print('输入市/区不能为空！')
            break
        county_in = input('请输入来自县/街道：')
        if len(county_in) == 0:
            print('输入县/街道不能为空！')
            break

        # 用difflib.get_close_matches()方法从high_provinces中取出1个与province_in最匹配的值，近似度0.6
        high_province = difflib.get_close_matches(province_in, high_provinces, 1, cutoff=0.6)
        # print(f'high_province:{high_province}')
        high_city = difflib.get_close_matches(city_in, high_citys, 1, cutoff=0.6)
        # print(f'high_city:{high_city}')
        high_county = difflib.get_close_matches(county_in, high_countys, 1, cutoff=0.6)
        # print(f'high_county:{high_county}')
        middle_province = difflib.get_close_matches(province_in, middle_provinces, 1, cutoff=0.6)
        # print(f'middle_province:{middle_province}')
        middle_city = difflib.get_close_matches(city_in, middle_citys, 1, cutoff=0.6)
        # print(f'middle_city:{middle_city}')
        middle_county = difflib.get_close_matches(county_in, middle_countys, 1, cutoff=0.6)
        # print(f'middle_county:{middle_county}')

	# 构造所有可能的中高风险条件
        # 高、高、高
        if high_province and high_city and high_county:
            print(f'{province_in}，{city_in}，{county_in} 为高风险省/直辖市，市/区，县/街道！！')
        # 高、高、中
        elif high_province and high_city and not high_county and not middle_province and not middle_city and middle_county:
            print(f'{province_in} 为高风险省/直辖市，{city_in} 为高风险市/区，{county_in} 为中风险县/街道。')
        # 高、高、低
        elif high_province and high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市，{city_in} 为高风险市/区，{county_in} 为低风险县/街道。')
        # 高、低、低
        elif high_province and not high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市，{city_in} 为低风险市/区，{county_in} 为低风险县/街道。')
        # 高、中、中
        elif high_province and not high_city and not high_county and middle_province and middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市，{city_in} 为中风险市/区，{county_in} 为中风险县/街道。')
        # 高、中、低
        elif high_province and not high_city and not high_county and not middle_province and middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市，{city_in} 为中风险市/区，{county_in} 为低风险县/街道。')
        # 中、中、中
        elif not high_province and not high_city and not high_county and middle_province and middle_city and middle_county:
            print(f'{province_in} 为中风险省/直辖市，{city_in} 为中风险市/区，{county_in} 为中风险县/街道。')
        # 中、中、低
        elif not high_province and not high_city and not high_county and middle_province and middle_city and not middle_county:
            print(f'{province_in} 为中风险省/直辖市，{city_in} 为中风险市/区，{county_in} 为低风险县/街道。')
        # 中、低、低
        elif not high_province and not high_city and not high_county and middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为中风险省/直辖市，{city_in} 为低风险市/区，{county_in} 为低风险县/街道。')
        # 低、低、低
        elif not high_province and not high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in}，{city_in}，{county_in} 为低风险省/直辖市，市/区，县/街道。')
        else:
            print(f'不在中高风险列表中，或行政区域不匹配，请检查！！')

        loop = int(input('是否继续查询？1-继续，0-退出。'))
        if loop == 0:
            break


if __name__ == '__main__':
    # 第一次运行时需要打开下面的注释，这样才能先从网上获取到数据
    # get_datas()
    
    # 以下是读取本地数据来进行查询的，前提是要先运行一次上一行get_datas()
    with open('./risk_data.log', 'r', encoding='utf-8') as f:
        datas_dic = json.loads(f.read())
        high_lst = get_highlist(datas_dic)
        # print(high_list)
        middle_lst = get_middlelist(datas_dic)
        # print(middle_list)
        chaxun(high_lst, middle_lst)

运行结果

全国中高风险地区查询结果
以上是运行结果，输入信息支持模糊查询

kylner

关注

4
点赞
踩
20

收藏

觉得还不错? 一键收藏
20
评论
用python写了个全国疫情中高风险地区查询

用python写了个全国疫情中高风险地区查询，分享给大家一起交流，希望得到不同思路的指教，让代码更简洁，运行效率更高。1、找到可供查询的源网站2、分析、获取查询的API3、构造API4、获取全国中高风险地区信息5、对信息按“省、市、街道”进行整理6、构造查询匹配语句，判断输入的“省、市、县”是否在获取的信息中注：输入匹配部分的代码自己总觉得不是最优解，希望能有大佬看到，指点一二。......
复制链接

扫一扫