爬取中国天气网,并做出可视化界面
发现码字真的也是一门技术活,刚开始还可以码码字,现在根本码不动了,就是随便记录下自己的学习过程吧,不定期更新,毕竟还是要毕业的啊,没很多时间一直学这个。
import requests
from lxml import etree
from pyecharts.charts import Bar
from pyecharts import options as opt
BASE_INFO = []
def parse_page(html):
a = []
coon = html.xpath("//div[@class='conMidtab']")[0]
print(coon)
conmidtab = coon.xpath(".//div[@class='conMidtab2']")
info = {}
#print(etree.tostring(conmidtab,encoding='utf-8').decode('utf-8'))
for trss in conmidtab:
trs = trss.xpath(".//tr")[2:]
for index,tr in enumerate(trs):
#print(etree.tostring(tr, encoding='utf-8').decode('utf-8'))
if index == 0:
city =(tr.xpath(".//td[2]/a/text()"))
else:
city =(tr.xpath(".//td[1]/a/text()"))
td = tr.xpath(".//td")[-2]
#print(td)
temp = td.xpath("./text()")[0]
#print(temp)
BASE_INFO.append({'city':city,'temp':int(temp)})
for value in BASE_INFO:
print(value)
def get_html(url):
headers = {
'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(KHTML,likeGecko) Chrome/76.0.3809.132Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
text = response.content.decode('utf-8')
html = etree.HTML(text)
return html
else:
print(response.status_code)
return None
def main():
urls = ['http://www.weather.com.cn/textFC/hb.shtml','http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hz.shtml','http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/hn.shtml','http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml']
for url in urls:
html = get_html(url)
#print(etree.tostring(html,encoding='utf-8').decode('utf-8'))
parse_page(html)
BASE_INFO.sort(key=(lambda BASE_INFO:BASE_INFO['temp']),reverse=True)
print(BASE_INFO)
cities = (list(map(lambda x:x['city'],BASE_INFO)))[0:10]
temps = list(map(lambda X:X['temp'],BASE_INFO))[0:10]
return cities,temps
if __name__ == '__main__':
cities,temps=main()
#bar.render_notebook()
bar = (
Bar()
.add_xaxis(cities)
.add_yaxis('',temps)
.set_global_opts(title_opts=opt.TitleOpts(title="气温排行版"))
)
bar.render('F:\\气温.html')