python3抓取中国天气网不同城市7天、15天实时数据

最新推荐文章于 2023-11-08 21:53:59 发布

weixin_30823833

最新推荐文章于 2023-11-08 21:53:59 发布

阅读量996

点赞数

文章标签： python 爬虫数据库

原文链接：http://www.cnblogs.com/ZoeLiang/p/11357732.html

版权

思路：
1、根据city.txt文档来获取不同城市code
2、获取中国天气网7d和15d不同城市url
3、利用requests库请求url获取html内容
4、利用beautifulsoup获取7d和15d指定天气数据
5、将获取的天气数据保存到csv文件中

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import csv


'''
获取不同城市code
'''


def get_citycode(city_name):
    with open('city.txt', 'r', encoding='UTF-8') as fs:
        lines = fs.readlines()#一次读取整个文件内容，且自动分成一行列表，readline(）每次只读取一行
        for line in lines:
            if(city_name in line):
                code = line.split('=')[0].strip()#每行去掉头尾空格，且用“=”分隔出code和cityname，返回字符串列表
                return code
    raise ValueError('invalid city name')#抛出异常





'''
获取不同城市7天url
'''


def get_7d_url(city_name):
    url = 'http://www.weather.com.cn/weather/'
    code = get_citycode(city_name)
    return url + code + '.shtml'


'''
获取不同城市15天url
'''


def get_15d_url(city_name):
    url = 'http://www.weather.com.cn/weather15d/'
    code = get_citycode(city_name)
    return url + code + '.shtml'


''''
获取html内容
'''


def get_content(url, data=None):
    rep = requests.get(url, timeout=60)
    rep.encoding = 'utf-8'
    return rep.text


'''
获取7天指定数据
'''


def get_7d_data(htmltext, city):
    content = []
    bs = BeautifulSoup(htmltext, "html.parser")
    body = bs.body
    data = body.find('div', {'id': '7d'})
    ul = data.find('ul')
    li = ul.find_all('li')
    for day in li:
        line = [city]
        date = day.find('h1').string
        p = day.find_all('p')
        text = p[0].string
        if p[1].find('span') is None:
            temperature_H = None
        else:
            temperature_H = p[1].find('span').string
        temperature_L = p[1].find('i').string
        wind_force = p[2].find('i').string
        line.append(date)
        line.append(text)
        line.append(temperature_H)
        line.append(temperature_L)
        line.append(wind_force)
        content.append(line)
    return content



'''
获取15天指定数据
'''


def get_15d_data(htmltext, city):
    content = []
    bs = BeautifulSoup(htmltext, "html.parser")
    body = bs.body
    data = body.find('div', {'id': '15d'})
    ul = data.find('ul')
    li = ul.find_all('li')
    for day in li:
        line = [city]
        span = day.find_all('span')
        date = span[0].string
        text = span[1].string
        if span[2].find('em') is None:
            temperature_H = None
        else:
            temperature_H = span[2].find('em').string
        temperature_L = span[2].string
        wind_direction = span[3].string
        wind_force = span[4].string
        line.append(date)
        line.append(text)
        line.append(temperature_H)
        line.append(temperature_L)
        line.append(wind_direction)
        line.append(wind_force)
        content.append(line)
    return content




'''
保存获取到的天气数据
csv文件
'''


def save_data(data, filename):
    with open(filename, 'a', errors='ignore', newline='') as f: #newline=" "是为了避免写入之后有空行
        f_csv = csv.writer(f)
        f_csv.writerows(data)#数据整行写入csv文件中







'''
爬取7天天气数据
'''
def _7d(city):
    url = get_7d_url(city)
    html = get_content(url)
    result = get_7d_data(html,city)
    save_data(result, 'E:\weather.csv')




'''
爬取15天天气数据
'''
def _15d(city):
    url = get_15d_url(city)
    html = get_content(url)
    result = get_15d_data(html,city)
    save_data(result, 'E:\weather.csv')




if __name__ == '__main__':
    cities = input('city name: ').split(' ')  # 键盘输入城市，用空格分隔开
    for city in cities:
        _7d(city)
        _15d(city)

附：city.txt 获取地址：https://pan.baidu.com/s/1VNW8AJi6_zo7mP_90lTkiA 提取码：red5

转载于:https://www.cnblogs.com/ZoeLiang/p/11357732.html