python爬虫练习--爬取某城市历史气象数据(待优化)

# -*- coding=utf-8 -*-
from __future__ import print_function  
import urllib.request  
from bs4 import BeautifulSoup  
  
strYear = '2013'              
strFile = 'zhengzhou' + strYear + '.csv'  
f = open(strFile, 'w')  
  
for month in range(1, 13):  
    if(month < 10):  
        strMonth = '0' + str(month)  
    else:  
        strMonth = str(month)  
    strYearMonth = strYear + strMonth  
    print("\nGetting data for month" + strYearMonth + "...", end='')  
      
    url  = "http://lishi.tianqi.com/beijing/"+strYearMonth+".html"  
    page = urllib.request.urlopen(url)
    #创建BeautifulSoup对象	
    soup = BeautifulSoup(page, "html.parser")  
    weatherSet = soup.find(attrs={"class":"tqtongji2"})  
    if(weatherSet == None):  
        print("fail to get the page", end='')   
        continue  
      
    for line in weatherSet.contents:  
        if(line.__class__.__name__ == 'NavigableString'): continue  
        if(len(line.attrs) > 0): continue  
        lis = line.findAll('li')  
        strDate = lis[0].text  
        highWeather = lis[1].text  
        lowWeather  = lis[2].text
        weather = lis[3].text
        windDirection = lis[4].text
        windPower = lis[5].text		
        f.write(strDate +',' + lowWeather +',' + highWeather + ','+weather + ',' +
		windDirection + ',' + windPower +'\n')  
    print("done", end='')  
      
f.close()  

参考资料:http://cuiqingcai.com/1319.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值