1. enumerate()方法可以同时拿到index和value。
2. python匿名函数,lambda表达式,可以简化代码。详见2.2可视化过程。
3. map()函数返回的是map类型,需要转换成list类型。
示例代码如下:
# coding:utf-8
import requests
from bs4 import BeautifulSoup
import json
from pyecharts import Bar
ALL_DATA = []
def parse_page(url):
# 1.get方法请求网页
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36"
}
response = requests.get(url, headers=headers)
text = response.content
# 2.BeautifulSoup方法解析html
# lxml解析器:容错能力一般。html5lib解析器:相当于浏览器自带的解析器,容错能力很强
# 安装方式:pip install html5lib。
# pip list 可以查看安装了哪些包
soup = BeautifulSoup(text, "html5lib")
divs = soup.find("div", class_="conMidtab")
tables = divs.find_all("table")
for table in tables:
trs = table.find_all("tr")[2:]
# enumerate()方法可以同时拿到index和value
for index, tr in enumerate(trs):
tds = tr.find_all("td")
if index == 0:
city = list(tds[1].stripped_strings)[0]
else:
city = list(tds[0].stripped_strings)[0]
temperature = list(tds[-2].stripped_strings)[0]
citys = {"city": city, "Lowest_temperature": int(temperature)}
ALL_DATA.append(citys)
# 使用json.dumps()方法解决print打印编码问题
# print json.dumps(citys, encoding="utf-8", ensure_ascii=False)
# print "="*50
def main():
# 1.获取数据
urls = [
"http://www.weather.com.cn/textFC/hb.shtml",
"http://www.weather.com.cn/textFC/db.shtml",
"http://www.weather.com.cn/textFC/hd.shtml",
"http://www.weather.com.cn/textFC/hz.shtml",
"http://www.weather.com.cn/textFC/hn.shtml",
"http://www.weather.com.cn/textFC/xb.shtml",
"http://www.weather.com.cn/textFC/xn.shtml",
"http://www.weather.com.cn/textFC/gat.shtml"
]
for url in urls:
parse_page(url)
# 2.数据分析
# 2.1根据最低气温进行排序
# 使用python中的匿名函数,lambda表达式
ALL_DATA.sort(key=lambda list_data: list_data["Lowest_temperature"])
# 使用json.dumps()方法解决print打印编码问题
# print json.dumps(ALL_DATA, encoding="utf-8", ensure_ascii=False)
# 2.2可视化过程:pyecharts.Bar
data = ALL_DATA[0:10]
# 匿名函数lambda表达式,map()函数返回的是map类型,需要转换成list类型
cities = list(map(lambda c: c["city"], data))
temperatures = list(map(lambda c: c["Lowest_temperature"], data))
chart = Bar("中国最低气温表")
# 添加横坐标和纵坐标
chart.add("", cities, temperatures)
# render渲染
chart.render("china_temperature.html")
print "Successful"
if __name__ == '__main__':
main()
# ALL_DATA = [
# {"city": "台北", "Lowest_temperature": "25"},
# {"city": "高雄", "Lowest_temperature": "27"},
# {"city": "台中", "Lowest_temperature": "25"}
# ]
# # 使用python中的匿名函数,lambda表达式
# ALL_DATA.sort(key=lambda list_data: list_data["Lowest_temperature"])
# # 使用json.dumps()方法解决print打印编码问题
# print json.dumps(ALL_DATA, encoding="utf-8", ensure_ascii=False)