大学排名爬取
技术路线
编辑代码思路
发现和解决问题
参考博客
关于None问题的博客文章
https://blog.csdn.net/qq_43544492/article/details/84869155?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.control
代码实现
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r=requests.get(url)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist,html):
soup=BeautifulSoup(html,"html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].contents[0], tds[1].a.string, tds[4].contents[0]])
def printUnivList(ulist,num):
tplr = "{0:^10}\t{1:^10}\t{2:^10}"
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
print(tplr.format("排名","学校","分数",chr(12288)))
for i in range(num):
u = ulist[i]
print(tplt.format(u[0].replace(' ','').replace("\n",""), u[1], u[2].replace(' ','').replace("\n",""),chr(12288)))
def main():
uinfo = []
url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
html = getHTMLText(url)
fillUnivList(uinfo,html)
printUnivList(uinfo,20)
main()