import urllib.request,re,datetime
def get_html(city,year,month):
url="https://m.tianqi.com/lishi/%s/%s%s.html"%(city,year,month)
request=urllib.request.Request(url)
request.add_header("User-Agent","Mozilla/5.0")
return urllib.request.urlopen(request).read().decode("UTF-8")
# print(get_html("haidian","2019","08"))
dates,xq,tq,qw,highs,lows=[],[],[],[],[],[]
city="haidian"
year="2019"
months=["%02d"%i for i in range(1,13)]
prev_day=datetime.datetime(2018,12,31)
for month in months:
html=get_html(city,year,month)
nospace_text="".join(html.split())
pattern=re.compile('<divclass="weatherbox">(.*?)</div><divclass="clearline1">')
div_list=re.findall(pattern,nospace_text)
pattern1 = re.compile('<dlclass="table_day15">(.*?)</dl>')
dls = re.findall(pattern1, div_list[0])
for dl in dls:
date_pattern=re.compile('<ddclass="date">(.*?)</dd>')
date_dd=re.findall(date_pattern,dl)
d_str=year+"/"+date_dd[0][0:5]
# print(d_str)
date_pattern = re.compile('<i>(.*?)</i>')
date_xq = re.findall(date_pattern, dl)
date_pattern=re.compile('<ddclass="txt1">(.*?)</dd>')
date_tq=re.findall(date_pattern,dl)
date_pattern = re.compile('<ddclass="txt2">(.*?)</dd>')
date_qw = str(re.findall(date_pattern, dl)).replace("<b>","").replace("</b>","").replace("℃","")
try:
cur_day=datetime.datetime.strptime(d_str,'%Y/%m/%d')
except ValueError:
print(cur_day,"数据错误")
else:
diff=cur_day-prev_day
if diff!=datetime.timedelta(days=1):
print('在%s之前丢失数据'%d_str)
dates.append(d_str)
xq.append(date_xq)
tq.append(date_tq)
qw.append(date_qw)
qws=date_qw[2:-2].split("~")
lows.append(int(qws[0]))
highs.append(int(qws[1]))
prev_day=cur_day
import pygal
bar =pygal.Line()
bar.title="北京市海淀区%s年气温分析"%year
bar.add("最低气温",lows)
bar.add("最高气温",highs)
bar.x_labels =dates
bar.x_labels_major=dates[::10]
bar.show_minor_x_labels=False
bar.x_title="日期"
bar.y_title="气温(摄氏度)"
bar.x_label_rotation=45
# bar.legend_at_bottom=True
# bar.margin=35
bar.show_y_guides=True
bar.show_x_guides=False
bar.render_to_file("temp.svg")
Python统计天气数据
最新推荐文章于 2024-06-07 16:34:38 发布