用python写了个全国疫情中高风险地区查询

最近用python写了个全国疫情中高风险地区查询的爬虫代码,分享给大家一起交流,希望得到不同思路的指教,让代码更简洁,运行效率更高。

总体思路

1、找到可供查询的源网站
2、分析、获取查询的API
3、构造API
4、获取全国中高风险地区信息
5、对信息按“省、市、街道”进行整理
6、构造查询匹配语句,判断输入的“省、市、县”是否在获取的信息中

注:输入匹配部分的代码自己总觉得不是最优解,希望能有大佬看到,指点一二。

代码部分

# -*- coding: utf-8 -*-
# @Time    : 2022/5/12 11:08
# @Author  : Kyln.Wu
# @Email   : kylnwu@qq.com
# @File    : 疫情风险地区查询.py
# @Software: PyCharm
import hashlib
import json
import difflib
import requests
import time



# 获取当前时间戳
timestamp = str(int((time.time())))
# print(timestamp)

# 定义几个解密需要用到的常量
token = '*********************' # 这里不让写全部密钥,写在注释里了:23y0ufFl5YxIyGrI8hWRUZmKkvtSjLQA
nonce = '123456789abcdefg'
passid = 'zdww'
key = "3C502C97ABDA40D0A60FBEE50FAAD1DA"


# 用python逆向Headers里要插入的zdwwsignature变量的值
def get_zdwwsignature():
    zdwwsign = timestamp + 'fTN2pfuisxTavbTuYVSsNJHetwq5bJvC' + 'QkjjtiLM2dCratiA' + timestamp
    hsobj = hashlib.sha256()
    hsobj.update(zdwwsign.encode('utf-8'))
    zdwwsignature = hsobj.hexdigest().upper()
    # print(zdwwsignature)
    return zdwwsignature


# 用python逆向Params里要插入的signatureheader变量的值
def get_signatureheader():
    has256 = hashlib.sha256()
    sign_header = timestamp + token + nonce + timestamp
    has256.update(sign_header.encode('utf-8'))
    signatureHeader = has256.hexdigest().upper()
    # print(signatureHeader)
    return signatureHeader


# 这里才是主函数,通过api接口,带入需要的参数,获取全国的数据
def get_datas():
    url = 'https://bmfw.www.gov.cn/bjww/interface/interfaceJson'
    headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        # "Content-Length": "235",
        "Content-Type": "application/json; charset=UTF-8",
        "Host": "bmfw.www.gov.cn",
        "Origin": "http://bmfw.www.gov.cn",
        "Referer": "http://bmfw.www.gov.cn/yqfxdjcx/risk.html",
        # "Sec-Fetch-Dest": "empty",
        # "Sec-Fetch-Mode": "cors",
        # "Sec-Fetch-Site": "cross-site",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0",
        "x-wif-nonce": "QkjjtiLM2dCratiA",
        "x-wif-paasid": "smt-application",
        "x-wif-signature": get_zdwwsignature(),
        "x-wif-timestamp": timestamp
    }

    params = {
        'appId': "NcApplication",
        'paasHeader': "zdww",
        'timestampHeader': timestamp,
        'nonceHeader': "123456789abcdefg",
        'signatureHeader': get_signatureheader(),
        'key': "3C502C97ABDA40D0A60FBEE50FAAD1DA"
    }

    resp = requests.post(url, headers=headers, json=params)
    datas = resp.text

    # 在线获取后,保存到本地,再进行本地整理操作,减少在线访问,以免被封IP
    with open('./risk_data.log', 'w', encoding='utf-8') as f:
        f.write(datas)


# 获取data中highlist部分数据,即高风险地区数据
def get_highlist(data):
    highlist = data['data']['highlist']
    return highlist


# 获取data中middlelist部分数据,即中风险地区数据
def get_middlelist(data):
    middlelist = data['data']['middlelist']
    return middlelist


# 查询中高风险地区的函数。这部分的算法不是很严谨,有待改进。
def chaxun(high_list, middle_list):
    # 用列表推导式从high_list中取出province的值,然后用set()去重,再转换成列表
    high_provinces = list(set([x['province'] for x in high_list]))
    high_citys = list(set([x['city'] for x in high_list]))
    high_countys = list(set([x['county'] for x in high_list]))
    print(f'高风险省/直辖市:{high_provinces}')
    print(f'高风险市/区:{high_citys}')
    print(f'高风险县/街道:{high_countys}')
    middle_provinces = list(set([x['province'] for x in middle_list]))
    middle_citys = list(set([x['city'] for x in middle_list]))
    middle_countys = list(set([x['county'] for x in middle_list]))
    print(f'中风险省/直辖市:{middle_provinces}')
    print(f'中风险市/区:{middle_citys}')
    print(f'中风险县/街道:{middle_countys}')

    while True:

        province_in = input('请输入来自省/直辖市:')
        if len(province_in) == 0:
            print('输入省/直辖市不能为空!')
            break
        city_in = input('请输入来自市/区:')
        if len(city_in) == 0:
            print('输入市/区不能为空!')
            break
        county_in = input('请输入来自县/街道:')
        if len(county_in) == 0:
            print('输入县/街道不能为空!')
            break

        # 用difflib.get_close_matches()方法从high_provinces中取出1个与province_in最匹配的值,近似度0.6
        high_province = difflib.get_close_matches(province_in, high_provinces, 1, cutoff=0.6)
        # print(f'high_province:{high_province}')
        high_city = difflib.get_close_matches(city_in, high_citys, 1, cutoff=0.6)
        # print(f'high_city:{high_city}')
        high_county = difflib.get_close_matches(county_in, high_countys, 1, cutoff=0.6)
        # print(f'high_county:{high_county}')
        middle_province = difflib.get_close_matches(province_in, middle_provinces, 1, cutoff=0.6)
        # print(f'middle_province:{middle_province}')
        middle_city = difflib.get_close_matches(city_in, middle_citys, 1, cutoff=0.6)
        # print(f'middle_city:{middle_city}')
        middle_county = difflib.get_close_matches(county_in, middle_countys, 1, cutoff=0.6)
        # print(f'middle_county:{middle_county}')

	# 构造所有可能的中高风险条件
        # 高、高、高
        if high_province and high_city and high_county:
            print(f'{province_in}{city_in}{county_in} 为高风险省/直辖市,市/区,县/街道!!')
        # 高、高、中
        elif high_province and high_city and not high_county and not middle_province and not middle_city and middle_county:
            print(f'{province_in} 为高风险省/直辖市,{city_in} 为高风险市/区,{county_in} 为中风险县/街道。')
        # 高、高、低
        elif high_province and high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市,{city_in} 为高风险市/区,{county_in} 为低风险县/街道。')
        # 高、低、低
        elif high_province and not high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市,{city_in} 为低风险市/区,{county_in} 为低风险县/街道。')
        # 高、中、中
        elif high_province and not high_city and not high_county and middle_province and middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市,{city_in} 为中风险市/区,{county_in} 为中风险县/街道。')
        # 高、中、低
        elif high_province and not high_city and not high_county and not middle_province and middle_city and not middle_county:
            print(f'{province_in} 为高风险省/直辖市,{city_in} 为中风险市/区,{county_in} 为低风险县/街道。')
        # 中、中、中
        elif not high_province and not high_city and not high_county and middle_province and middle_city and middle_county:
            print(f'{province_in} 为中风险省/直辖市,{city_in} 为中风险市/区,{county_in} 为中风险县/街道。')
        # 中、中、低
        elif not high_province and not high_city and not high_county and middle_province and middle_city and not middle_county:
            print(f'{province_in} 为中风险省/直辖市,{city_in} 为中风险市/区,{county_in} 为低风险县/街道。')
        # 中、低、低
        elif not high_province and not high_city and not high_county and middle_province and not middle_city and not middle_county:
            print(f'{province_in} 为中风险省/直辖市,{city_in} 为低风险市/区,{county_in} 为低风险县/街道。')
        # 低、低、低
        elif not high_province and not high_city and not high_county and not middle_province and not middle_city and not middle_county:
            print(f'{province_in}{city_in}{county_in} 为低风险省/直辖市,市/区,县/街道。')
        else:
            print(f'不在中高风险列表中,或行政区域不匹配,请检查!!')

        loop = int(input('是否继续查询?1-继续,0-退出。'))
        if loop == 0:
            break


if __name__ == '__main__':
    # 第一次运行时需要打开下面的注释,这样才能先从网上获取到数据
    # get_datas()
    
    # 以下是读取本地数据来进行查询的,前提是要先运行一次上一行get_datas()
    with open('./risk_data.log', 'r', encoding='utf-8') as f:
        datas_dic = json.loads(f.read())
        high_lst = get_highlist(datas_dic)
        # print(high_list)
        middle_lst = get_middlelist(datas_dic)
        # print(middle_list)
        chaxun(high_lst, middle_lst)

运行结果

全国中高风险地区查询结果
以上是运行结果,输入信息支持模糊查询

  • 4
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 20
    评论
评论 20
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值