未来15天天气预测数据

未来15天天气预测数据

个人学习分享,有可以改进的地方欢迎指出,共同学习。

import requests
import pandas as pd
from bs4 import BeautifulSoup
import time
from datetime import datetime
import os
import re
import pypinyin
from pypinyin import lazy_pinyin,  load_phrases_dict

# 安装:pip install pypinyin

#结果列表
results = list()
unget = list()

#自定义词典
personalized_dict = {
        '五指山': [['tong'], ['shi']],
        '张家口': [['zhang']],
        '漯河': [['luo'], ['he']],
        '朝阳': [['chao'], ['yang']],
        '锡林郭勒': [['xi'], ['lin'], ['guo'], ['le']],
        '长治': [['chang'], ['zhi']],
        '昌都': [['chang'], ['du']],
        '巴音郭楞': [['ba'], ['zhou']],
        '阿勒泰': [['a'], ['le'], ['tai']]
    }

def get_week_day(date):
    week_day_dict = {
        1: '星期一',
        2: '星期二',
        3: '星期三',
        4: '星期四',
        5: '星期五',
        6: '星期六',
        7: '星期日',
    }
    return week_day_dict[date]

'''
使用request、BeautifulSoup框架爬取2345上的15天预测天气
'''
def getweather():
    pwd = os.getcwd()
    filePath = pwd + os.sep + "2020编码.csv"
    (num, city_names, city_codes, spells) = read_csv(
        filePath)
    for i in range(0, num):
        spell = spells[i]
        code = city_codes[i].pop()
        city_name = city_names[i].pop()
        url = "http://tianqi.2345.com/%s/%d.htm" % (spell, code)
        try:
            rs = requests.Session()
            res = rs.get(url)
            # 获取页面源码
            html = res.text
            # BeautifulSoup转换页面源码
            bs = BeautifulSoup(html, "html.parser")
            # 获取script中的15天气温
            script_content = bs.select('script')[-2].string
            pattern = re.compile(
                '.*?data.*?\[(.*?)\].*?',
                re.S)
            temps = re.findall(pattern, script_content)
            high_temps = temps[1].strip(',').replace('"', '').split(',')
            low_temps = temps[2].strip(',').replace('"', '').split(',')

            # 计数指针
            index = 0

            ul = bs.findAll("ul", {"id": "js_hours24"})
            lis = ul[0].findAll("li")
            for li in lis:
                # 去除字符串首尾的空格
                content = li.text.strip().splitlines()
                content.pop()
                content.pop(2)
                what_day = content.pop(0)
                date_time = content.pop(0)
                year = int(time.strftime("%Y", time.localtime()))
                times = date_time.split('/')
                month = int(times[0])
                day = int(times[1])
                date = datetime.date(datetime(year=year, month=month, day=day))
                week = get_week_day(date.isoweekday())
                # 第二列为星期几
                content.insert(0, week)
                # 第一列为日期
                content.insert(0, date.strftime("%Y-%m-%d"))
                # 获取气温
                content.insert(2, high_temps[index])
                content.insert(3, low_temps[index])
                content.insert(0, code)
                content.insert(0, city_name)
                index += 1
                results.append(content)

            # write_csv(results)
            print("成功获取%s15天预测数据"%city_name)
            # 关闭session
            rs.close()
        except Exception as e:
            print(e)
            print("获取%s15天预测数据出现异常"%city_name)
            error_info = city_name + "-" + str(code) + "-" + spell
            unget.append(error_info)
            continue
    write_csv(results)
    print(unget)



'''
拼接拼音
'''
def spell(city_name):
    load_phrases_dict(personalized_dict)
    return ''.join(lazy_pinyin(city_name))

'''
读取地区编码文件中的地区信息
'''
def read_csv(filePath):
    df = pd.read_csv(filePath, encoding='gbk')
    city_names = df.iloc[:, 0:1].values.tolist()
    city_codes = df.iloc[:, 1:2].values.tolist()
    spells = list()
    for city_name in city_names:
        spells.append(spell(city_name))
    num = len(city_codes)
    return num, city_names, city_codes, spells

def write_csv(results):
    pwd = os.getcwd()
    filePath = pwd + os.sep + "weather.csv"
    columns = ['地区名称', '地区编码', '日期', '星期', '最高气温', '最低气温', '天气', '风向', '风力']
    dt = pd.DataFrame(results, columns=columns)
    #不要序号列
    dt.to_csv(filePath, index=False)


if __name__ == '__main__':
    weatherlist = getweather()
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值