Python作业8
实现一个动态排行榜和词云
import requests
from bs4 import BeautifulSoup
import json
import re
import time
import jieba
from _collections import OrderedDict
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy
import wordcloud
USER_AGENT={'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}
def GetData():
print("正在爬取 https://www.tiobe.com/tiobe-index/")
#发送请求,获取数据
text = requests.get('https://www.tiobe.com/tiobe-index/',headers=USER_AGENT).text
#获取总的全部数据
total_content = ''.join(re.findall(r'series:(.*?)\}\);',text,re.DOTALL))
#获取每种编程语言的数据
total_content = re.findall(r'({.*?})',total_content,re.DOTALL)
print(total_content)
datas=[]
for k,content in enumerate(total_content):
#print(content)
name = ''.join(re.findall(r"{name : '(.*?)'",content,re.DOTALL))
if name=='Assembly language':
name='R'
if name=='Visual Basic':
name='VB'
if name=='JavaScript':
name='JS'
#print(name)
data = re.findall(r"\[Date.UTC(.*?)\]",content,re.DOTALL)
#print(data)
for i in data:
i = i.replace(' ','')
i = re.sub(r'[()]','',i)
value = i.split(',')[-1]
#print("value"+value)
data_list= i.split(',')[:3]
time = ''
for index,j in enumerate(data_list):
if index!=0:
if len(j)==1:
j = '0' + j
datas.append({
'name':name,
'data':data_list,
'value':value
})
print("爬取成功,数据处理成功")
return datas
def sort_key(old_dict,reverse=False):
keys = sorted(old_dict.keys(),reverse=reverse)
new_dict = OrderedDict()
for key in keys:
new_dict[key] = old_dict
return new_dict
if __name__ == "__main__":
datas = []
month_list = []
datas = GetData()
names=['Java','C++','SQL','PHP','R','JS','VB','C','Python','C#']
for i in datas:
i['data'] = i['data'][0] + '.' + i['data'][1] + '.' + i['data'][2]
for i in datas:
if(i['name']=='Java'):
#print(i['data'])
month_list.append(i['data'])
#print(month_list)
per_month_lan_dict={i:{j:0 for j in names} for i in month_list}
#print(per_month_lan_dict)
for month in month_list:
for lan in names:
for i in datas:
if i['name']==lan and i['data']==month:
per_month_lan_dict[month][lan]=i['value']
#print(per_month_lan_dict)
colors = ['plum','darkorange','lightgreen','yellow','salmon','aqiamarine','dodgerblue','pink','b','darkviolet']
color_dict = {'C#':'plum','Python':'darkorange','C++':'lightgreen','C':'yellow','Java':'salmon','VB':'aquamarine','JS':'dodgerblue','R':'pink','PHP':'b','SQL':'darkviolet'}
plt.gca()
for data_item in per_month_lan_dict:
plt.cla()
temp = sorted(per_month_lan_dict[data_item].items(), key=lambda item: float(item[1]))
#print(temp)
x = [item[0] for item in temp]
#print(x)
color = [color_dict[i] for i in x]
y = [float(item[1]) for item in temp]
#print(y)
plt.barh(range(1, 11), y, color=color)
plt.title(data_item, fontsize=24)
plt.yticks(range(1, 11), list(x), fontproperties='simhei', fontsize=16)
plt.xticks(range(0, 30, 100))
for x, y in zip(range(1, 11), y):
plt.text(y + 0.1, x - 0.1, str(y))
plt.pause(0.001)
plt.show()
# 词云
plt.figure()
text = ''
for i in per_month_lan_dict['2021.4.2'].items():
for k in range(1,round(float(i[1]))):
text=text+' '+i[0]
#print(text)
w=wordcloud.WordCloud()
w.generate(text)
plt.imshow(w)
plt.show()