根据输入的城市和年月,对该地区该时间段的历史天气数据进行爬取。其中用到了xpinyin库Pinyin类对输入的地名转换为拼音,用于获取信息时的地址输入。将获得的天气数据进行打印和保存
代码:
import urllib2 import urllib import codecs import random import os from xpinyin import Pinyin from bs4 import BeautifulSoup class Weather: #初始化方法 def __init__(self): self.user_agent='Mozilla/5.0 (Windows NT 6.1; Win64; x64)' self.headers = {'User-Agent': self.user_agent} self.contens=[] # 先获取可查询的天气范围 def weatherRange(self,Location): try: url_ = 'http://lishi.tianqi.com/' + Location + '/index.html' request_ = urllib2.Request(url_, headers=self.headers) response_ = urllib2.urlopen(request_) pageCode_ = response_.read() soup_ = BeautifulSoup(pageCode_) range_ = soup_.select('span.f_12') return range_ except urllib2.HTTPError,e: if hasattr(e, "code"): print u"获取历史天气查询范围失败,错误原因:", e.code return None if hasattr(e, "reason"): print u"获取历史天气查询范围失败,错误原因:", e.reason return None #获取某月的页面代码 def getPageCode(self,date_,Location): try: url = 'http://lishi.tianqi.com/' +Location + '/' + date_ + '.html' request = urllib2.Request(url, headers=self.headers) response = urllib2.urlopen(request) pageCode = response.read() return pageCode except urllib2.HTTPError, e: if hasattr(e, "code"): print u"连接天气网失败,错误原因:", e.code return None if hasattr(e, "reason"): print u"连接天气网失败,错误原因:", e.reason return None #获得某月的历史数据 def getPageInfo(self,date_,Location): pageCode=self.getPageCode(date_,Location) if pageCode: soup=BeautifulSoup(pageCode) #天气详情 soup = BeautifulSoup(pageCode) nodes = soup.select('div#tool_site') node1 = nodes[1] #标题 xiangqingtitle = node1.select('div.box-hd') #天气详情 xiangqing = node1.select('ul li') #风向统计 node2=nodes[2] #标题 fengxiangtitle=node2.select('div.box-hd') #风向详情 fengxiang=node2.select('ul li') #风力统计 node3=nodes[3] #标题 fenglititle=node3.select('div.box-hd') #风向详情 fengli=node3.select('ul li') return xiangqingtitle,xiangqing,fengxiangtitle,fengxiang,fenglititle,fengli else: print "页面加载失败..." return None # 加载该月的数据内容,并进行保存 def loadInfo(self,date_,Location): xiangqingtitle, xiangqing, fengxiangtitle, fengxiang, fenglititle, fengli= self.getPageInfo(date_,Location) if xiangqing: print u"\n正在获取%s月的天气数据...\n" %date_ print u"\n正在打印%s月的天气数据...\n"%date_ # 随机创建txt文件,并向txt文件写入 path_file_name = 'weather' + str(random.randint(1, 1000)) + '.txt' if not os.path.exists(path_file_name): f = codecs.open(path_file_name, 'w', 'utf-8') #打印天气详情 print xiangqingtitle[0].get_text().strip() f.write(xiangqingtitle[0].get_text().strip()) f.write('\r\n') i=0 for item in xiangqing: i+=1 if i%6==0: print item.get_text().strip(), f.write(item.get_text().strip()) f.write('\r\n') print '\n' else: print item.get_text().strip(), f.write(item.get_text().strip()) #打印风向 print '\n' print fengxiangtitle[0].get_text().strip() i=0 for item in fengxiang: i+=1 if i%4==0: print item.get_text().strip(), print '\n' else: print item.get_text().strip(), #打印风力 print '\n' print fenglititle[0].get_text().strip() for item in fengli: print item.get_text().strip(), else: print "页面加载失败..." return #开始方法 def start(self,date_,Location): #开始加载资源 self.loadInfo(date_,Location) enable=True while enable: input_=raw_input("\n是否继续查询,是输入1,否输入0\n") if input_==str(1): Location_ = raw_input("\n请输入查询历史天气的城市名:\n") pin = Pinyin() Location = pin.get_pinyin(Location_.decode('utf-8'),"") range_ =self.weatherRange(str(Location)) print u"\n%s可供查询历史天气的时间范围是%s\n" % (Location_.decode('utf-8'), range_[0].get_text().strip()) date_ = raw_input("\n请输入查询年月(如201101):\n") self.loadInfo(str(date_), str(Location)) else: enable=False Location_=raw_input("请输入查询历史天气的城市名:\n") pin = Pinyin() Location=pin.get_pinyin(Location_.decode('utf-8'),"") spider=Weather() range_=spider.weatherRange(str(Location)) print u"\n%s可供查询历史天气的时间范围是%s\n"%(Location_.decode('utf-8'),range_[0].get_text().strip()) date_=raw_input("\n请输入查询年月(如201101):\n") spider=Weather() spider.start(str(date_),str(Location))