更便捷的爬取每日天气图

Amber_SYH

已于 2022-04-15 17:02:19 修改

阅读量1.1k

点赞数

文章标签： python

于 2022-04-14 20:46:13 首次发布

本文链接：https://blog.csdn.net/Amber_SYH/article/details/124181267

版权

之前写过一个爬天气图的帖子，是用Selenium做的，经各位大佬指出有些笨重，并向我推荐了一些比较简便的方法。先搁置了一段时间，最近又要爬图了，所以重新改了一份简便一点的。并且做了一个自动运行，设定的是每天都运行一遍，也就是每天自动爬图。但是这个需要任务一直运行，可能还有更好的方法实现每天自动爬图的需求，欢迎指出！


# 加载需要的库
import requests
import time 
import re,sys,os
import schedule

def papic():
    import requests
    root_dir = f"D:/code/pachong3/"
    headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36'
    }
    zone=['chinaall','huadong']

    level=['000','925','850','700','500','100']
    for i in level:
        root_d=os.path.join(root_dir, i)
        url= f"http://www.nmc.cn/publish/observations/china/dm/weatherchart-h"+i+".htm"
    
    #url = f"http://www.nmc.cn/publish/radar/chinaall.html"
        html = requests.get(url,headers=headers)
        html.encoding = 'utf-8'
        pics = re.findall( 'img="(http://image.nmc.cn/product/\d+/\d+/\d+/WESA/SEVP_NMC_WESA_SFER_EGH_ACWP_L\d+_P9_\d+.jpg?)\?v=\d+"', html.text, re.S)
                              # http://image.nmc.cn/product/2022/04/14/WESA/SEVP_NMC_WESA_SFER_EGH_ACWP_L85_P9_20220414000000000.jpg?v=1649929836149     
                              #http://image.nmc.cn/product/2022/04/14/WESA/SEVP_NMC_WESA_SFER_EGH_ACWP_L00_P9_20220414090000000.jpg?v=1649934491025
    # 这里做测试，我们只下载前12张图片
        for pic in pics[:12]:
            
            info_data = requests.get(pic,headers=headers)
        
            name = pic.split("/")[-1]
        #date = name.split("_")[-1][:10]
            #download(pic, os.path.join(root_d,  name))
            path1=os.path.join(root_d,  name)
            (dirpath, temp) = os.path.split(path1)
            if not os.path.isdir(dirpath):      #判断系统文件夹是否存在
                os.makedirs(dirpath)

            if not os.path.exists(path1):        #判断文件是否存在
                try:
                    info_data = requests.get(url,headers=headers)
                    with open(path1,'wb') as f:
                        f.write(info_data.content)
                except:
                    print('faild: ' + url)
        
    #######################################################
if __name__ == '__main__':
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36'
    }

    schedule.every(24).hours.do(papic)    # 每隔一天执行一次任务

    while True:
        schedule.run_pending()  # run_pending：运行所有可以运行的任务

文中关于爬图的代码参考了链接：【爬虫】GRAPES预报图片，所见即所得 (qq.com)