前期准备
准备一张白底图片作为词云底板
说明:本代码爬取的是最近上映的电影,不同的日期运行此段代码可能会得到不同的结果
实现效果
代码实现
import warnings
warnings.filterwarnings("ignore")
import jieba
import numpy
import codecs
import re
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from urllib import request
import requests
from bs4 import BeautifulSoup as bs
from wordcloud import WordCloud,ImageColorGenerator
import matplotlib
matplotlib.rcParams['figure.figsize'] = (10.0, 5.0)
# 分析网页函数
def getNowPlayingMovie_list():
url = 'https://movie.douban.com/nowplaying/shanghai/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
resp = requests.get(url=url, headers=headers)
# resp = request.urlopen('https://movie.douban.com/nowplaying/shanghai/')
# html_data = resp.read().decode('utf-8')
soup = bs(resp.text, 'html.parser')
nowplaying_movie = soup.find_all('div', id='nowplaying')
nowplaying_movie_list = nowplaying_movie[0].find_all('li', class_='list-item')
nowplaying_list = []
for item in no