import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
def request_lists(city):
url = 'http://weathernew.pae.baidu.com/weathernew/pc'
province = '江苏省'
srcid = 4982
headers = {
'cookie': 'BIDUPSID=EFE54456868C2B3E16D49CF292D5AB83; PSTM=1614996862; BAIDUID=EFE54456868C2B3E8ED683FEA8D20814:FG=1; BDUSS=3ltU0ZoUjJEWTZWSEptekNrMDZTbks4RzY4dkg0dFQ3cXlDRC1EbXN5dDNhVzFnSVFBQUFBJCQAAAAAAAAAAAEAAAApowKIzsTN5sSp2akAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHfcRWB33EVgU; BDSFRCVID=bLtOJexroG38sPQeieRiqhTDs2KK0gOTDYLtOwXPsp3LGJLVgeasEG0PtENUNzF-oxnIogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJuOVCKhfCI3fP36q4jo2bt_-q-X5-RLfbT0Lp7F5l8-hl3wXjJi3-QWjUc8aP3wbm-HahvIMxoxOKQphn6G3U_P2a5hKlJKWCALblrN3KJmSxK9bT3v5tDTXfbm2-biW2tH2Mbda45P_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhhCGe6LBe5OBjGAjqbbfb-oD3b88Kb7Vbpcm5MnkbJkXhPteLjOm2DIf_bOs2nbFfqP4yURJjPI7QbrH0xc3JTKtMb0K5nrvSlr63xcpQT8r5a7bBhOlLIrRalrsab3vOp44XpO1hJLzBN5thURB2DkO-4bCWJ5TMl5jDh3Mb6ksD-Ftqj_ffR4q_IPQKt8_HRjYbb__-P4DeUQI-xRZ56bHWh0bfpTGVR5YQhJYKtk7Qh-HBMPj52OnKUT1bp7boMJRK5bdQUIT3xJKKnJ43bRTLp7G0tnPV56v2hbshP-UyPkHWh37a6TlMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafDDbbD_xjTu3MRJH5Mob-C62aJ38-nvvWJ5WqR7jDT7505t8MfoNaRo3Qgneoqvctn3cShbXXMovMPuJbnofK4olXacM0Rjb3l02V-bHXloF2x5Dhp7XB4RMW23roq7mWn6hsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjCajTcXDat8q-jeHDrKBRbaHJOoDDvPXbOcy4LbKxnxJ5veLN6-BR5424jsShbRDxRvD--g3-OkWn39babTQ-tbBp3k8MQTbtQ5QfbQ0hO4WqQwMeFLW-n8WR7JOpvsDxnxy-u0QRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6T2-DA__CtaJI5P; H_PS_PSSID=33838_34004_33607_26350_34023; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_3535ee208b02ecdf4b2576fd444e8473=1620634568,1620729027,1620785597,1622255359; Hm_lpvt_3535ee208b02ecdf4b2576fd444e8473=1622255359',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4469.4 Safari/537.36',
'Referer': 'http://weathernew.pae.baidu.com/',
}
query = '{}天气'.format(city)
params = {
'query': query,
'srcid': srcid,
'city_name': city,
'province_name': province
}
response = requests.get(url, headers=headers, params=params)
soup = BeautifulSoup(response.text, features="lxml")
tag = soup.find_all("script")[0].decode_contents()[17:]
tag = tag[:len(tag) - 1]
a = json.loads(tag)
lists = a['15_day_forecast']['info']
return lists
def save(lists, all_dict, local):
subkey = {'date': '日期', 'weather_day': '天气', 'wind_power_day': '风级', 'temperature_day': '白天温度',
'temperature_night': '夜晚温度'}
for dict in lists:
subdict = {value: dict[key] for key, value in subkey.items()} # 提取原字典中部分键值对,并替换key为中文
subdict["地区"] = local
all_dict = {key: ", ".join([all_dict[key], subdict[key]]) for key in all_dict if key in subdict}
return all_dict
def get_weather_15():
df = pd.read_excel("six_location.xlsx")
max_row = df.shape[0]
all_dict = {'日期': '2021-12-16', '天气': '多云', '风级': '<3级', '白天温度': '11', '夜晚温度': '-5', '地区': '邯郸'}
for i in range(0, max_row):
mess_local = df.iloc[i, 0]
city = str(mess_local)
print("地区", city)
lists = request_lists(city)
all_dict = save(lists, all_dict, city)
date = all_dict['日期'].split(',')
weather = all_dict['天气'].split(',')
wind = all_dict['风级'].split(',')
day = all_dict['白天温度'].split(',')
night = all_dict['夜晚温度'].split(',')
location = all_dict['地区'].split(',')
all_data = pd.DataFrame({'日期': date, '天气': weather, '风级': wind, '白天温度': day, '夜晚温度': night, '地区': location})
all_data.drop(0, inplace=True, axis=0)
all_data['日期'] = all_data['日期'].map(str)
all_data['白天温度'] = all_data['白天温度'].map(int)
all_data['夜晚温度'] = all_data['夜晚温度'].map(int)
all_data.to_excel('baidudata_15.xlsx', encoding='utf-8', index=False)
if __name__ == '__main__':
get_weather_15()
python爬取未来15天天气情况
最新推荐文章于 2024-04-22 10:53:28 发布