import requests
import json
import csv
import pandas as pdfrom bs4
import BeautifulSoup
import matplotlib.pyplot as pltimport numpy as np
from locale import*try: r = requests.get("http://www.fortunechina.com/fortune500/c/2019-07/10/content_337536.htm")
soup = BeautifulSoup(r.text,'html.parser')
soup.tbody.find_all('tr')
allUniv =[]titleUniv =[]for tr in data:
singleUniv =[]
lname = tr.find_all('a')
ltd = tr.find_all('td')for td in ltd:
singleUniv.append(td.string)for name in lname:
singleUniv[2]= name.string
allUniv.append(singleUniv)
dataUniv =[]for i in allUniv:
i = i[2:]
dataUniv.append(i)
form = pd.DataFrame(dataUniv)
file_name ='form.csv'
form.to_csv(file_name ,encoding='utf-8_sig')#存储数据到csv文件并防止生成乱码
form = pd.DataFrame(dataUniv[:10])
plt.rcParams['font.sans-serif']=['Simhei']
plt.rcParams['axes.unicode_minus']=Falseform.drop([0], axis=1)
lname =[i for i in form[0]]
turnover =[i for i in form[1]]profit =[i for i in form[2]]
setlocale(LC_NUMERIC,'English_US')#处理数据中所含的千分号d
ata1 =[]for d in turnover:
data1.append(atof(d))
data2 =[]for d in profit:
data2.append(atof(d))
index = np.arange(len(form[0]))#绘制柱状图
plt.figure(figsize=(20,10))
width =0.15plt.title('中国十大企业营业额和营业利润柱状图', fontsize =20)
plt.bar(index, data1, width = width, color ='c', align ='center', label ='营业额',tick_label = lname, alpha =0.5)
plt.bar(index + width, data2, width = width, color ='r', align ='center', label ='营业利润', alpha =0.5)
plt.xticks(rotation=-10)
plt.legend()
plt.show()