import requests
from bs4 import BeautifulSoup
import bs4
def getHT(url):
try:
r = requests.get(url, timeout=1)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "异常"
def fillUL(ulist,html):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find("table").children:
if isinstance(tr, bs4.element.Tag):
tds = tr("td")
ulist.append([tds[0].string, tds[1].string, tds[2].string])
def printUL(ulist, num):
print("{:^10}\t{:^6}\t{:^10}".format("rank", "name", "score"))
for i in range(num):
u= ulist[i]
print("{:^10}\t{:^6}\t{:^10}".format(u[0], u[1],u[2]))
def main():
uinfo = []
url = "https://www.eol.cn/e_html/gk/dxpm/index.shtml"
html = getHT(url)
fillUL(uinfo, html)
printUL(uinfo, 20)
main()
中国大学排名定向爬虫
于 2023-04-23 11:42:29 首次发布