[Python]网络爬虫 爬取天气数据、城市、日温度、风向、风力、天气

结果:

代码如下:

import requests
from bs4 import BeautifulSoup
import IO as ios

class item:
    def __init__(self):
        self.date = list()  # 日期
        self.max_temp = list()  # 最高温
        self.min_temp = list()  # 最低温
        self.weather = list()  # 天气
        self.wind_direction = list()  # 风向
        self.wind_force=list()  # 风力


Data_Box = item()  # 数据盒子


# 函数默认返回北京市2018年1月到12月的url
def get_url(city):
    '''
    city为城市拼写的字符串,year为年份+月份
    '''
    for i in range(2015,2021):
        for time in range(int(str(i)+"01"), int(str(i)+"13")):
            url = "http://lishi.tianqi.com/{}/{}.html".format(city, time)
            yield url+" "+city


# 获取天气数据
def get_datas():
    for line in open("CityEn.txt",encoding='utf-8'):
        print(str(line.split(" ")[1]).strip("\n"))
        urls = get_url(str(line.split(" ")[1]).strip("\n"))
        cookie = {
            "cityPy": "UM_distinctid=171f2280ef23fb-02a4939f3c1bd4-335e4e71-144000-171f2280ef3dab; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1588905651; CNZZDATA1275796416=871124600-1588903268-%7C1588990372; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1588994046"}
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400"}
        for url in urls:
            try:
                html = requests.get(url=url.split(" ")[0], headers=header, cookies=cookie)
                soup = BeautifulSoup(html.text, 'html.parser')
                ul = soup.find_all("ul", class_='thrui')[0]
                # print(ul)
                lis = ul.find_all("li")[:-1]
                for li in lis:
                    # 最后一个li标签不是天气数据
                    div = li.find_all("div")
                    Data_Box.date.append(div[0].text.split("-")[0]+div[0].text.split("-")[1]+div[0].text.split("-")[2])
                    Data_Box.max_temp.append(div[1].text)
                    Data_Box.min_temp.append(div[2].text)
                    Data_Box.weather.append(div[3].text)
                    Data_Box.wind_direction.append(div[4].text.split(" ")[0])
                    Data_Box.wind_force.append(div[4].text.split(" ")[1])
            except:
                print("该页面爬取失败!")

            else:
                print("该页面爬取成功!")
        for i in range(0, len(Data_Box.date)):
            print(str(line.split(" ")[0]),url.split(" ")[1],Data_Box.date[i],  Data_Box.min_temp[i],Data_Box.max_temp[i], Data_Box.weather[i],
                  Data_Box.wind_direction[i],Data_Box.wind_force[i])
            ios.cw("weather.txt",str(line.split(" ")[0])+" "+url.split(" ")[1]+" "+Data_Box.date[i]+" "+Data_Box.min_temp[i]+" "+Data_Box.max_temp[i]+" "+Data_Box.weather[i]+" "+
                  Data_Box.wind_direction[i]+" "+Data_Box.wind_force[i]+"\n")

    return "数据获取完毕"


# 爬取程序主函数
if __name__ == "__main__":
    get_datas()
  • 3
    点赞
  • 35
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值