国庆节一直下雨,各县的天气是不是在下雨,
领导问的时候,我说有雨还是没雨?
从网上直接获取,就像这样
上代码
'''
临汾17县的天气情况
'''
import requests
import re, json
import pandas as pd
def get_city_id(city='襄汾'):
cookies = {
'Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b': '1665213553',
'f_city': '%E5%A4%AA%E5%8E%9F%7C101100101%7C',
'Hm_lpvt_080dabacb001ad3dc8b9b9049b36d43b': '1665214259',
}
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
# Requests sorts cookies= alphabetically
# 'Cookie': 'Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1665213553; f_city=%E5%A4%AA%E5%8E%9F%7C101100101%7C; Hm_lpvt_080dabacb001ad3dc8b9b9049b36d43b=1665214259',
'Referer': 'http://www.weather.com.cn/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
}
params = {
'cityname':city, # '曲沃',
'callback': 'success_jsonpCallback',
'_': '1665214375072',
}
response = requests.get('http://toy1.weather.com.cn/search', params=params, cookies=cookies, headers=headers, verify=False)
text=response.text
text=text.strip("success_jsonpCallback()") #方法1
# text=re.findall('success_jsonpCallback\((.*?)\)',text)[0] #方法2
# print(text)
areas=json.loads(text)
s=areas[0]['ref'] # '101100702~shanxi~曲沃~Quwo~曲沃~Quwo~357~43400~QW~山西'
id=s.split('~')[0] # 101100702
# print(s,id)
return id
def get_weather(city_id='101100701'): #城市天气
cookies = {
'Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b': '1665213553',
'f_city': '%E5%A4%AA%E5%8E%9F%7C101100101%7C',
'Hm_lpvt_080dabacb001ad3dc8b9b9049b36d43b': '1665218563',
}
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
# Requests sorts cookies= alphabetically
# 'Cookie': 'Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1665213553; f_city=%E5%A4%AA%E5%8E%9F%7C101100101%7C; Hm_lpvt_080dabacb001ad3dc8b9b9049b36d43b=1665218563',
'Referer': 'http://www.weather.com.cn/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
}
params = {
'_': '1665219082053',
}
response = requests.get(f'http://d1.weather.com.cn/sk_2d/{city_id}.html', params=params, cookies=cookies,
headers=headers, verify=False)
txt=response.content.decode('utf8').strip('var dataSK=')
tq=json.loads(txt)
# for k,v in tq.items():
# print(k,v)
return tq
if __name__ == '__main__':
qx_names = ['安泽', '汾西', '浮山', '古县', '洪洞', '侯马', '吉县', '蒲县', '曲沃', '永和', '襄汾',
'乡宁', '隰县', '大宁', '尧都', '翼城', '霍州','夏县']
# for i,qx_name in enumerate(qx_names):
# id=get_city_id(city=qx_name)
# print(i,qx_name,id)
a=get_weather(city_id=get_city_id('襄汾'))
print(a)
#批量 爬17县
results=[]
for i,qx_name in enumerate(qx_names):
a = get_weather(city_id=get_city_id(qx_name))
results.append(a)
df=pd.DataFrame(results)
print(df)
df.to_excel('1.xlsx')
a=1
看结果, 打开 1.xlsx
字段名有点好理解,城市名,温度、温度f, 风,英文风,n级风,风速,湿度、气压、能见度,空气质量指数,2.5指数