# -*- coding: utf-8 -*-
# @Time : 2020/7/9 20:58
# @Author : python_HongHu
# @Email : 464646939@qq.com
# @File : work_data_wcd.py
# @Software: PyCharm
#思路:获取腾讯热点新闻标题作为词云文字素材-->保存为csv文件-->读取csv文件内文字制作词云图
import jieba
import requests
import json
import jsonpath
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from imageio import imread
#请求词云数据
def get_wcd(url):
headers_value={'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"} #请求头,自行更改,不会的加我,QQ:1767801995
try:
resp=requests.get(url,headers=headers_value)
except requests.exceptions.ConnectionError as e:
print("请求错误", e)
resp = None
print("热点请求成功")
return resp
#获取词云数据
def parse_data(resp):
data = json.loads(resp.text)
print(data)
title = jsonpath.jsonpath(data, "$..title")
print(title)
print("词云热点数据爬取成功!")
return title
#数据保存到CSV
def save_csv(title):
result=pd.DataFrame()
result['热点']=title
result.to_csv("csv/热点.csv", encoding='utf_8_sig',index=None) #csv文件保存位置,自行更改。
#读取csv文件
def read_wcd():
df=pd.read_csv('csv/热点.csv') #csv文件读取位置,与save_csv中保存位置一致
wordcloud=df['热点'].tolist()
return wordcloud
#制作词云
def make_wcd(wordcloud):
word_list=[" ".join(jieba.cut(sentence)) for sentence in wordcloud]
new_text=' '.join(word_list)
pic_path='imgdata/1.jpg' #图片素材
img_mask=imread(pic_path)
wordcloud = WordCloud(background_color="white", font_path='/home/shen/Downloads/font/msyh.ttc', mask=img_mask,
stopwords=STOPWORDS).generate(new_text)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
if __name__=='__main__':
url='https://api.dreamreader.qq.com/news/v1/province/news/list?province_code=hb&page_size=10' #腾讯新闻,文字素材网址
resp=get_wcd(url)
title=parse_data(resp)
save_csv(title)
wordcloud=read_wcd()
make_wcd(wordcloud)
'''
![csv文件截图.png](https://upload-images.jianshu.io/upload_images/19102738-3852dac3c20c6133.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
![词云图.png](https://upload-images.jianshu.io/upload_images/19102738-146a622c73326593.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
![运行截图.png](https://upload-images.jianshu.io/upload_images/19102738-2292c0af2f7e2741.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
'''