爬虫python做表格-Python爬虫与数据图表的实现

importrequestsfrom bs4 importBeautifulSoupimportnumpy as npimportmatplotlib.pyplot as plt

allUniv=[]defgetHTMLText(url):try:

r= requests.get(url, timeout=30)

r.raise_for_status()

r.encoding= "utf-8"

returnr.textexcept:return ""

deffillUnivList(soup):

data= soup.find_all("tr")for tr indata:

ltd= tr.find_all("td")if len(ltd) ==0:continuesingleUniv=[]for td inltd:

singleUniv.append(td.string)

allUniv.append(singleUniv)returnlen(allUniv)defprintUnivList(num):print("{0:^4} {1:^20} {2:^5} {3:^8} {4:^8} {5:^8} {6:^8}".format("排名", "学校名称", "省市", "总分", "生源质量", "培养结果", "顶尖成果"))for i inrange(num):

u=allUniv[i]if u[2] == "江西":print("{0:^4} {1:^20} {2:^5} {3:^8} {4:^8} {5:^8} {6:^8}".format(u[0], u[1], u[2], u[3], str(u[4]), str(u[5]), str(u[9])))defdrawBarChart(num):

jxlg=[]

ncdx=[]

hdjd=[]for i inrange(num):

u=allUniv[i]if u[1] == "江西理工大学":

jxlg.append(float(u[3]))

jxlg.append(float(u[4]))

jxlg.append(float(str(u[5]).replace("%", "")))

jxlg.append(float(u[9]))if u[1] == "南昌大学":

ncdx.append(float(u[3]))

ncdx.append(float(u[4]))

ncdx.append(float(str(u[5]).replace("%", "")))

ncdx.append(float(u[9]))if u[1] == "华东交通大学":

hdjd.append(float(u[3]))

hdjd.append(float(u[4]))

hdjd.append(float(str(u[5]).replace("%", "")))

hdjd.append(float(u[9]))

name_list= ["总分", "生源质量", "培养结果", "顶尖成果"]

x=list(range(len(name_list)))

total_width, n= 0.8, 4width= total_width /n

fig, ax=plt.subplots()

plt.rcParams["font.sans-serif"] = "SimHei"plt.bar(x, jxlg, width=width, label="江西理工大学", tick_label=name_list, fc="r")for i inrange(len(x)):

x[i]= x[i] +width

plt.bar(x, ncdx, width=width, label="南昌大学", fc="y")for i inrange(len(x)):

x[i]= x[i] +width

plt.bar(x, hdjd, width=width, label="华东交通大学", fc="b")#plt.xticks(np.arange(len(name_list)))

plt.legend()

plt.show()defdrawBar(num):

djcg=[]

name=[]

explode=[]for i inrange(num):

u=allUniv[i]if u[2] == "江西":

djcg.append(u[9])

name.append(u[1])if u[1] == "江西理工大学":

explode.append(0.5)else:

explode.append(0)

plt.rcParams["font.sans-serif"] = "SimHei"fig1, ax1=plt.subplots()

ax1.pie(djcg, explode=explode, labels=name, autopct="%1.1f%%",

shadow=True, startangle=90)

ax1.axis("equal")

plt.legend()

plt.show()defmain():

url= "http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"html=getHTMLText(url)

soup= BeautifulSoup(html, "html.parser")

num=fillUnivList(soup)

printUnivList(num)

drawBarChart(num)

drawBar(num)if __name__ == "__main__":

main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值