不多说了,直接上代码,详情看注释。
import re
import requests
import csv
import codecs
import datetime
class Weather:
def __init__(self,city_id):
self.city_id = city_id
self.url = 'http://www.weather.com.cn/weather/{}.shtml'.format(self.city_id)
self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
# 请求网站,返回网页源代码
def get_html_text(self):
html = requests.get(self.url, headers = self.header)
html.encoding = 'utf-8'
self.html = html.text
# print(self.html)
# 去掉字符串里的方括号
def rinse(self,string):
string = string.replace('[', '')
string = string.replace(']', '')
# self.string = string
return string
# 写入本地文件
def save_csv(self,data):
csv_file = codecs.open('weather_7day.csv', 'a', 'utf-8')
try:
writer = csv.writer(csv_file)
writer.writerow(data)
# print('成功写入。。。。。。。。')
finally:
csv_file.close()
# 获取目标数据并做存储
def get_goal(self):
day = re.compile('var hour3data={.*?"7d":(.*?)}')
day = re.findall(day, self.html)[0]
#调用清洗方法
day = self.rinse(day)
#在次匹配
day_7 = re.compile('".*?"')
day_7 = re.findall(day_7, day)
# time.ctime() >>>>>>>>>>>>>>>> Fri Apr 20 07:44:44 2018
#datetime.datetime.now() >>>>>> 2018-04-20 07:58:34.547408
title = ['时间','num','天气','温度','风向','风力级别','num','采集时间','城市ID']
self.save_csv(title)
#遍历
for i in day_7:
hour = i[1:-1].split(",")+[datetime.datetime.now()]+[self.city_id]
self.save_csv(hour)
# print(hour)
# print(day_7)
"""
在网页上要获取的数据是这个样子的>>>
var hour3data={"1d":["19日20时,n01,多云,18℃,西南风,3-4级,0","19日23时,n01,多云,14℃,西南风,5-6级,0","20日02时,n01,多云,14℃,西南风,5-6级,0","20日05时,n01,多云,18℃,西南风,5-6级,0","20日08时,d01,多云,22℃,西南风,5-6级,3","20日11时,d01,多云,28℃,西南风,4-5级,3","20日14时,d01,多云,31℃,西南风,5-6级,3","20日17时,d01,多云,30℃,西南风,5-6级,3","20日20时,n01,多云,23℃,西南风,3-4级,0"],"23d":[["21日08时,d07,小雨,16℃,东北风,4-5级,3","21日11时,d07,小雨,15℃,东北风,4-5级,3","21日14时,d07,小雨,9℃,东北风,4-5级,3","21日17时,d07,小雨,15℃,东北风,3-4级,3","21日20时,n08,中雨,10℃,东北风,3-4级,0","21日23时,n01,多云,7℃,东北风,<3级,0","22日02时,n01,多云,6℃,东北风,<3级,0","22日05时,n01,多云,6℃,东北风,3-4级,0"],["22日08时,d01,多云,12℃,东北风,3-4级,3","22日11时,d01,多云,15℃,东北风,3-4级,3","22日14时,d01,多云,16℃,东北风,3-4级,3","22日17时,d01,多云,16℃,东北风,3-4级,2","22日20时,n01,多云,15℃,东北风,<3级,0","23日02时,n01,多云,6℃,东北风,3-4级,0"]],"7d":[["19日20时,n01,多云,18℃,西南风,3-4级,0","19日23时,n01,多云,14℃,西南风,5-6级,0","20日02时,n01,多云,14℃,西南风,5-6级,0","20日05时,n01,多云,18℃,西南风,5-6级,0"],["20日08时,d01,多云,22℃,西南风,5-6级,3","20日11时,d01,多云,28℃,西南风,4-5级,3","20日14时,d01,多云,31℃,西南风,5-6级,3","20日17时,d01,多云,30℃,西南风,5-6级,3","20日20时,n01,多云,23℃,西南风,3-4级,0","20日23时,n01,多云,20℃,东北风,<3级,0","21日02时,n02,阴,18℃,东北风,3-4级,0","21日05时,n02,阴,16℃,东北风,4-5级,0"],["21日08时,d07,小雨,16℃,东北风,4-5级,3","21日11时,d07,小雨,15℃,东北风,4-5级,3","21日14时,d07,小雨,9℃,东北风,4-5级,3","21日17时,d07,小雨,15℃,东北风,3-4级,3","21日20时,n08,中雨,10℃,东北风,3-4级,0","21日23时,n01,多云,7℃,东北风,<3级,0","22日02时,n01,多云,6℃,东北风,<3级,0","22日05时,n01,多云,6℃,东北风,3-4级,0"],["22日08时,d01,多云,12℃,东北风,3-4级,3","22日11时,d01,多云,15℃,东北风,3-4级,3","22日14时,d01,多云,16℃,东北风,3-4级,3","22日17时,d01,多云,16℃,东北风,3-4级,2","22日20时,n01,多云,15℃,东北风,<3级,0","23日02时,n01,多云,6℃,东北风,3-4级,0"],["23日08时,d01,多云,11℃,东北风,3-4级,2","23日14时,d01,多云,16℃,东北风,3-4级,2","23日20时,n01,多云,10℃,东北风,<3级,0","24日02时,n00,晴,6℃,北风,3-4级,0"],["24日08时,d00,晴,10℃,北风,3-4级,1","24日14时,d00,晴,20℃,北风,3-4级,1","24日20时,n00,晴,16℃,北风,<3级,0","25日02时,n00,晴,8℃,西北风,3-4级,0"],["25日08时,d00,晴,13℃,西北风,3-4级,1","25日14时,d00,晴,20℃,西北风,3-4级,1","25日20时,n00,晴,16℃,西北风,<3级,0","26日02时,n01,多云,9℃,南风,3-4级,0"],["26日08时,d01,多云,16℃,南风,3-4级,3","26日14时,d01,多云,24℃,南风,3-4级,2","26日20时,n01,多云,19℃,南风,3-4级,0"]]}
"""
if __name__ == '__main__':
#城市ID >>> 101010100 如果你有足够的城镇ID,下面可以做循环获取不同城市的天气数据
city_id = 101010100
weather = Weather(city_id)
weather.get_html_text()
weather.get_goal()
"""
保存到本地是这个样子的>>>
时间,num,天气,温度,风向,风力级别,num,采集时间,城市ID
20日08时,d00,晴,18℃,南风,<3级,4,2018-04-20 08:07:01.028206,101010100
20日11时,d01,多云,23℃,南风,<3级,4,2018-04-20 08:07:01.029206,101010100
20日14时,d01,多云,28℃,南风,4-5级,4,2018-04-20 08:07:01.033226,101010100
20日17时,d01,多云,26℃,南风,4-5级,2,2018-04-20 08:07:01.036240,101010100
20日20时,n01,多云,23℃,南风,3-4级,0,2018-04-20 08:07:01.039248,101010100
20日23时,n01,多云,21℃,南风,3-4级,0,2018-04-20 08:07:01.042258,101010100
21日02时,n02,阴,19℃,南风,<3级,0,2018-04-20 08:07:01.045264,101010100
21日05时,n07,小雨,15℃,南风,<3级,0,2018-04-20 08:07:01.047778,101010100
21日08时,d07,小雨,14℃,南风,3-4级,3,2018-04-20 08:07:01.053790,101010100
...
"""
打开CSV文件。
想要获取更多精彩内容,请加V信公众号:Python练习册 或 加小编V信:AA_FMspider。欢迎分享转载文章。