# _*_coding =utf-8 _*_
# coding = utf-8
# @Time : 2021/1/28 6:57
# @Author : Chunjun.Yin
# @QQ : 408753851
# @FileName : HaoDaXue.py
# @Software : PyCharm
# url= https://www.shanghairanking.cn/rankings/bcur/2020
import bs4
import requests
import re
from bs4 import BeautifulSoup
def getHTMLText(url):
try:
r = requests.get(url,timeout=30) #time设置连接超时时间
r.raise_for_status()#获取页面的返回状态
r.encoding = r.apparent_encoding #设置页面编码格式
return r.text #返回页面信息
except:
return ""
def fillUnivList(uList,html):
soup = BeautifulSoup(html,"html.parser") #使用BeautifulSoup的html.parser解析器,解析之前获取到的页面信息
tempuList=[] #临时列表
for a_href in soup.find_all('a',href=re.compile('/institution/')):#获取中国大学的名称
# print(a_href.string)
for tds in a_href.parent.next_siblings:#获取a标签的所有父类及其横向便利
tempuList.append(tds)
uList.append([a_href.string, tempuList[0].text.replace("\n",""),tempuList[1].text.replace("\n",""),tempuList[2].text.replace("\n",""),tempuList[3].string.replace("\n","")])#所有的值存放列表,并过滤掉回车,便于后面打印
tempuList = []
def printUnivList(uList,num):
print("{:^10}\t{:^5}\t{:^30}\t{:^14}\t{:^30}\t{:^14}".format("排名","学校","城市","类型","总分","层次"))
for i in range(num):
u = uList[i]
x=i+1
print("{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}".format(x,u[0],u[1],u[2],u[3],u[4]))
def main():
uinfo=[]
url ="https://www.shanghairanking.cn/rankings/bcur/2020"
html = getHTMLText(url) #获取页面信息
fillUnivList(uinfo,html)#获取列表信息
printUnivList(uinfo,20)# #打印列表信息
main()#主函数
python-中国大学排名情况代码
最新推荐文章于 2022-04-27 21:54:47 发布
关键词由CSDN通过智能技术生成