爬虫入门四(续三)
文末附教程博客链接,感兴趣可以去看一下。
分享一个pyecharts学习网址:
Python:数据可视化pyecharts的使用.
用pyecharts展示爬取到的数据
绘制想看的人数(热度)排行榜:
import requests
from bs4 import BeautifulSoup
from pyecharts import Page, Pie, Bar
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id = "showing-soon")
#建立存储所有电影信息的集合
all_movies_info = []
for each_movie in all_movies.find_all('div', class_ = "item"):
all_a = each_movie.find_all('a')
all_li = each_movie.find_all('li')
movie_name = all_a[1].text
movie_href = all_a[1]['href']
movie_date = all_li[0].text
movie_type = all_li[1].text
movie_area = all_li[2].text
movie_lovers = all_li[3].text
all_movies_info.append({
'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers.replace('人想看', '')})
#print(all_movies_info)
#按‘想看的人数’排序
#sorted函数,第一个参数是一个可以遍历的对象,key参数接受一个匿名函数,用以指定以遍历对象内作为排序的依据的元素
sort_by_lovers = sorted(all_movies_info, key = lambda x: int(x[