Python--weather.No.2--csv的实操和正则表达式

 第二次尝试:

这次依然选用天气预报为例子(数据多)

但换成了国内的,API似乎有点简单

解析网页 以及 正则表达式 那里卡了很久

在看源码的过程中,我发现 网页广告 是如何插入网页的,其实就是一堆链接。。。。。

import re
import csv
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.select import Select

weather_list = []

def get_url():
    url='http://www.weather.com.cn/weather/101270101.shtml'
    #www.weather.com.cn/weather1d/101270101.shtml#input
    return url
def get_urlText(url):
    try:
        kv={'user-agent' : 'Mozilla/5.0'}
        r = requests.get(url,headers=kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding #使其encoding更准确
        #print (r.text)  #1
        return r.text
    except:
        print('error 1')
        return
def get_parseText(parse_url):
    try:
        soup = BeautifulSoup(parse_url,'html.parser')
        lists=[ ]
        lists = soup.find('ul','t clearfix').find_all('li')
        for elem in lists:
            date = elem.find('h1').get_text()
            weather = elem.find('p','wea').get_text()
            temperature = elem.find('p','tem').find('i').get_text()
            win = re.findall('(?<= title=").*?(?=\")', str(elem.find('p', 'win').find('em')))
                 #*?匹配前面那个子表达式0/1次,最小匹配  ?= 捕获以title= 开头的内容  ?=查找“前面的。
            wind = '-'.join(win)
            # print(wind)
            wind_lev=elem.find('p','win').find('i').get_text()
            global weather_list
            weather_list.append([date,weather,temperature,wind,wind_lev])
    except:
        print('error 2')
        return


def prints(weather_list):
    f = open('weatherlist', 'w',encoding='utf8')
    tplt = '{0:^10}\t{1:^10}\t{2:^10}\t{3:^10}\t{4:^10}'   #居中对齐
    #print(tplt.format('日期','天气','温度','风向','风级',chr(12288)))
    f.write(tplt.format('日期','天气','温度','风向','风级',chr(12288))) #以中文空格隔开
    f.write('\n')
    for elem in weather_list:
        f.write(tplt.format(elem[0],elem[1],elem[2],elem[3],elem[4],chr(12288)))
        #print(tplt.format(elem[0],elem[1],elem[2],elem[3],elem[4))
        f.write('\n')
    f.close()

def main():
    url = get_url()
    parse_text=get_urlText(url)
    get_parseText(parse_text)
    prints(weather_list)

main()



 以下为源码
(不得不说500多行  可能不太熟练  找了很久

每一天的天气为 <li class="sky skyid lv2 on">

<ul class="t clearfix">
<li class="sky skyid lv2 on">
<h1>4日(今天)</h1>
<big class="png40 d01"></big>
<big class="png40 n07"></big>
<p title="多云转小雨" class="wea">多云转小雨</p>
<p class="tem">
<span>16</span>/<i>9℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv3">
<h1>5日(明天)</h1>
<big class="png40 d07"></big>
<big class="png40 n07"></big>
<p title="小雨" class="wea">小雨</p>
<p class="tem">
<span>15</span>/<i>9℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv2">
<h1>6日(后天)</h1>
<big class="png40 d01"></big>
<big class="png40 n01"></big>
<p title="多云" class="wea">多云</p>
<p class="tem">
<span>15</span>/<i>9℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv2">
<h1>7日(周六)</h1>
<big class="png40 d01"></big>
<big class="png40 n07"></big>
<p title="多云转小雨" class="wea">多云转小雨</p>
<p class="tem">
<span>17</span>/<i>11℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv3">
<h1>8日(周日)</h1>
<big class="png40 d07"></big>
<big class="png40 n07"></big>
<p title="小雨" class="wea">小雨</p>
<p class="tem">
<span>15</span>/<i>9℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv2">
<h1>9日(周一)</h1>
<big class="png40 d01"></big>
<big class="png40 n01"></big>
<p title="多云" class="wea">多云</p>
<p class="tem">
<span>14</span>/<i>6℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
<li class="sky skyid lv2">
<h1>10日(周二)</h1>
<big class="png40 d01"></big>
<big class="png40 n00"></big>
<p title="多云转晴" class="wea">多云转晴</p>
<p class="tem">
<span>15</span>/<i>3℃</i>
</p>
<p class="win">
<em>
<span title="无持续风向" class="NNW"></span>
<span title="无持续风向" class="NNW"></span>
</em>
<i><3级</i>
</p>
<div class="slid"></div>
</li>
</ul>

准备改进 :

用csv输入输出

将中国各个城市的代码写入文件中。

import re
import csv
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.select import Select

weather_list = []

def get_url():
    url='http://www.weather.com.cn/weather/101270101.shtml'
    #www.weather.com.cn/weather1d/101270101.shtml#input
    return url
def get_urlText(url):
    try:
        kv={'user-agent' : 'Mozilla/5.0'}
        r = requests.get(url,headers=kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding #使其encoding更准确
        #print (r.text)  #1
        return r.text
    except:
        print('error 1')
        return
def get_parseText(parse_url):
    try:
        soup = BeautifulSoup(parse_url,'html.parser')
        lists=[ ]
        lists = soup.find('ul','t clearfix').find_all('li')
        for elem in lists:
            date = elem.find('h1').get_text()
            weather = elem.find('p','wea').get_text()
            temperature = elem.find('p','tem').find('i').get_text()
            win = re.findall('(?<= title=").*?(?=\")', str(elem.find('p', 'win').find('em')))
                 #*?匹配前面那个子表达式0/1次,最小匹配  ?= 捕获以title= 开头的内容  ?=查找“前面的。
            wind = '-'.join(win)
            # print(wind)
            wind_lev=elem.find('p','win').find('i').get_text()
            global weather_list
            weather_list.append([date,weather,temperature,wind,wind_lev])
    except:
        print('error 2')
        return


def prints(weather_list):
    titles=['日期','天气','温度','风向','风级']
    with open('weather.csv','w',encoding='utf8') as f:
        f_csv = csv.writer(f)
        f_csv.writerow(titles)
        for row in weather_list:
            f_csv.writerow(row)

def main():
    url = get_url()
    parse_text=get_urlText(url)
    get_parseText(parse_text)
    prints(weather_list)

main()



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值