Python爬虫实例之查询任意城市七天内的天气情况

最近在学习Python,学完基础教程后,在网上看了下如何进阶,大家推荐的基本都是从爬虫开始,容易上手还有成就感,不至于那么容易中途放弃。所以我开始学习了爬虫,零基础学习经历:
1、网上搜了一些爬虫的教学视频,基本都是有些python基础的就可以做的。
2、跟着视频学习,做一些基础练习,像我学习的基础,requestsurllib,还有scrapy框架(小爬虫基本用不上),数据清洗学的,lxmlbs4,以及正则表达式
3、试着做一些实例,找一些简单的网页爬取一些信息,做着做着发现,数据获取变得简单,数据清洗才是工作量。
下面分享我做的爬取天气预报,觉得这是我目前能做的最好的了。

import requests
from bs4 import BeautifulSoup
from lxml import etree
import re

#定义请求函数
def request_url(url):
    header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
    response=requests.get(url,headers=header)
    return response
"""
def file_load(words):
    with open ("file2.html",'w',encoding=("UTF-8")) as f:
        f.write(words)
"""
#获取省和直辖市名称及链接,返回城市名和url所对应的字典
cityhtml="""<div class="nav-province"><span class="expand"></span>北京市</div>
<ul>
<li><a href="/publish/forecast/ABJ/beijing.html" stationid="54511">北京</a></li>
<li><a href="/publish/forecast/ABJ/chaoyang.html" stationid="54433">朝阳</a></li>
<li><a href="/publish/forecast/ABJ/fengtai.html" stationid="54514">丰台</a></li>
<li><a href="/publish/forecast/ABJ/haidian.html" stationid="54399">海淀</a></li>
<li><a href="/publish/forecast/ABJ/shijingshan.html" stationid="54513">石景山</a></li>
<li><a href="/publish/forecast/ABJ/shunyi.html" stationid="54398">顺义</a></li>
<li><a href="/publish/forecast/ABJ/changping.html" stationid="54499">昌平</a></li>
<li><a href="/publish/forecast/ABJ/daxing.html" stationid="54594">大兴</a></li>
<li><a href="/publish/forecast/ABJ/fangshan.html" stationid="54596">房山</a></li>
<li><a href="/publish/forecast/ABJ/huairou.html" stationid="54419">怀柔</a></li>
<li><a href="/publish/forecast/ABJ/mentougou.html" stationid="54505">门头沟</a></li>
<li><a href="/publish/forecast/ABJ/miyun.html" stationid="54416">密云</a></li>
<li><a href="/publish/forecast/ABJ/pinggu.html" stationid="54424">平谷</a></li>
<li><a href="/publish/forecast/ABJ/tongzhou1.html" stationid="54431">通州</a></li>
<li><a href="/publish/forecast/ABJ/yanqing.html" stationid="54406">延庆</a></li>
</ul>
<div class="nav-province"><span class="collapsed"></span>天津市</div>
<ul style="display:none;">
<li><a href="/publish/forecast/ATJ/tianjin.html" stationid="54517">天津</a></li>
<li><a href="/publish/forecast/ATJ/xiqing.html" stationid="54527">西青</a></li>
<li><a href="/publish/forecast/ATJ/baodi.html" stationid="54525">宝坻</a></li>
<li><a href="/publish/forecast/ATJ/beichen.html" stationid="54528">北辰</a></li>
<li><a href="/publish/forecast/ATJ/binhaixinqu.html" stationid="54623">滨海新区</a></li>
<li><a href="/publish/forecast/ATJ/dongli.html" stationid="54526">东丽</a></li>
<li><a href="/publish/forecast/ATJ/jixian2.html" stationid="54428">蓟县</a></li>
<li><a href="/publish/forecast/ATJ/jinnan.html" stationid="54622">津南</a></li>
<li><a href="/publish/forecast/ATJ/jinghai.html" stationid="54619">静海</a></li>
<li><a href="/publish/forecast/ATJ/ninghe.html" stationid="54529">宁河</a></li>
<li><a href="/publish/forecast/ATJ/wuqing.html" stationid="54523">武清</a></li>
<li><a href="/publish/forecast/ATJ/dagang.html" stationid="54645">大港</a></li>
<li><a href="/publish/forecast/ATJ/hangu.html" stationid="54530">汉沽</a></li>
</ul>"""

def city_nameinfo():
    cityinfo = {}
    allcity = {}
    cityparentnames = []
    i = 0
    url = "http://m.nmc.cn/f/forecast/selectCity"
    # 构建bs4对象
    html = BeautifulSoup(request_url(url).content, "lxml")
    big_city_name = html.find_all("div", class_="nav-province")
    # 获取所有省份,直辖市
    for x in big_city_name:
        cityparentnames.append(x.text)

    # 获取所有城市信息
    cityname = html.find_all("ul")
    for name in cityname:
        city_names = name.find_all("li")
        allcity[cityparentnames[i]] = {}
        for x in city_names:
            cityinfo[x.text] = x.a.attrs["href"]
            #构建城市结构
            allcity[cityparentnames[i]][x.text] = x.a.attrs["href"]
        i += 1
    return cityinfo

#获取城市天气信息
weatherhtml="""
<ul class="weather_datemate">
<li> <p class="weather_w_20"> <strong> 今天 </strong> <span>12.29</span> </p> <p class="weather_w_25"> <span class="same"> - / 多云 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> -   - / 无持续风向   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> - / -6℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 明天 </strong> <span>12.30</span> </p> <p class="weather_w_25"> <span class="same"> 多云 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 无持续风向   3~4级 / 无持续风向   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> 0℃ / -9℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 后天 </strong> <span>12.31</span> </p> <p class="weather_w_25"> <span class="same"> 多云 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 无持续风向   微风 / 北风   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> -1℃ / -7℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 星期三 </strong> <span>01.01</span> </p> <p class="weather_w_25"> <span class="same"> 晴 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 北风   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> 3℃ / -5℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 星期四 </strong> <span>01.02</span> </p> <p class="weather_w_25"> <span class="same"> 晴 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 南风   微风 / 北风   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> 4℃ / -5℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 星期五 </strong> <span>01.03</span> </p> <p class="weather_w_25"> <span class="same"> 晴 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 南风   微风 / 北风   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> 6℃ / -3℃ </span> </p> </li>
<li> <p class="weather_w_20"> <strong> 星期六 </strong> <span>01.04</span> </p> <p class="weather_w_25"> <span class="same"> 多云 </span> </p> <p class="weather_w_35" style="width: 25%;"> <span class="same"> 南风   微风 / 北风   微风 </span> </p> <p class="weather_w_20" style="text-align: right; width: 30%;"> <span class="same"> 3℃ / -4℃ </span> </p> </li>
</ul>"""

#获取城市一周内天气信息,返回每天对应天气信息的字典字典
def weatherinfo(cityurl,cityname):
    html = BeautifulSoup(request_url(cityurl).content, "lxml")
    weatherinfo1 = html.find('ul', class_="weather_datemate")
    weatherinfo2 = weatherinfo1.find_all('li')
    city = cityname
    cityweather = {}
    for i in weatherinfo2:
        weatherinfo = i.find_all('span')
        num = 1
        for w in weatherinfo:
            if num == 1:
                date=w.text
                cityw = {city + date: {}}
                cityw[city + date]['date'] = w.text
            if num == 2:
                cityw[city + date]['weather'] = w.text
            if num == 3:
                cityw[city + date]['wind'] = w.text
            if num == 4:
                cityw[city + date]['temperature'] = w.text
            num += 1
        #一个城市每天的天气信息,不知道为什么无法在一个字典中保存多天的信息,故把每天的信息同步到另一个字典中的以保存全部
        cityweather.update(cityw)
        cityw.clear()
    return cityweather

if __name__ == "__main__" :
    #获取城市信息
    citynames=city_nameinfo()
    #输入要查询的城市
    while True:
        try:
            cityname = str(input("please input you chiose cityname:"))
            if cityname not in citynames:
                raise ValueError()
            break
        except ValueError:
            print("您输入的不是城市名,请再次尝试输入!")

    #构建要查询城市的URL
    cityurl=r"http://m.nmc.cn"+citynames[cityname]
    #获取城市天气信息
    cityweather=weatherinfo(cityurl,cityname)

    #输入要查询的日期
    # 如果日期未输入,则返回七天内天气信息,否则返回查询日期的天气信息
    while True:
        try:
            date=input("please input you chiose date:(format:mm.dd):")
            if date=='':
                print(cityweather)
                break
            elif cityname+date not in cityweather:
                raise ValueError()
            print(cityweather[cityname + date])
            break
        except ValueError:
            print("您输入日期不在查询期间或者格式不对,请再次尝试输入!")
  • 0
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值