Python爬取更新的天气情况

最新推荐文章于 2024-09-16 14:49:33 发布

GeekX.

最新推荐文章于 2024-09-16 14:49:33 发布

阅读量359

点赞数

分类专栏：爬虫文章标签： python

本文链接：https://blog.csdn.net/m0_57289085/article/details/122224148

版权

爬虫专栏收录该内容

2 篇文章 0 订阅

订阅专栏

import requests
import time
import os
import re
import sys
root_dir=f"project"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
}
def download(url,path):
(dirpath,temp)=os.path.split(path) #path存取的路径
if not os.path.isdir(dirpath):
os.makedirs(dirpath) #makedirs建立一些目录
if not os.path.exists(path):
try:
info_data=requests.get(url,headers=headers)
with open(path,"wb") as f:
f.write(info_data.content)
f.close()
except:
print("faild:"+url)
for init in range(0,22,3):
url=f"http://www.nmc.cn/publish/area/china/10mws_{init:02d}00.html"
html=requests.get(url,headers=headers)
html.enconding="utf-8"
pics=re.findall('src="(http://image.nmc.cn/product/\d+/\d+/\d+/NWPR/medium/SEVP_CNWP_NWPR_SRGRP_EDA_ACHN_L10M_P9_\d+.png?)\?v=\d+"',html.text,re.S)#text文本内容，在html内容里去找
for pic in pics[:4]:
print(pic)
name=pic.split("/")[-1]
date=name.split("_")[-1][:10]
download(pic,os.path.join(root_dir,"10m风",date,name))