1.目标
这里使用Python对电力领域的相关内容进行了简单分析,得到了电力行业热频关键词的词云图。
2.完整代码
import urllib.request, urllib.error
from bs4 import BeautifulSoup
import re
import jieba
from matplotlib import pyplot as plt
from wordcloud import WordCloud
from PIL import Image
import numpy as np
url = 'http://news.bjx.com.cn/live/2020lhdlzt/'
def main():
html = gethtml(url)
cont = getcont(html)
#print(cont[0]['lianjie'])
text = ''
m = 0
for m in range(len(cont)):
page_url = cont[m]['lianjie']
#print('读取第%d页'%(m+1))
page_cont = getpage_url(page_url)
text = text + page_cont
m += 1
#print(text)
word_cloud(text)
def gethtml(url):
header = {
'User-Agent'