# 1.查看是否静态网页
# 2.手动查看robots.txt
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(uList,html):
soup=BeautifulSoup(html,"html.parser")
for tr in soup.find("tbody").children:
if isinstance(tr,bs4.element.Tag):
tds=tr("td")
#== tds = tr.find_all('td')
uList.append([tds[0].string,tds[1].string,tds[3].string])
def printUnivList(uList,num):
print("{:^10}\t{:^15}\t{:^10}".format("排名","学校名称","总分"))
for i in range(num):
u=uList[i]
print("{:^10}\t{:^15}\t{:^10}".format(u[0],u[1],u[2]))
def writeInFile(filename,uList,num):
f=open(filename,"a")
f.write("{:^25}\t{:^25}\t{:^25}\n".format("排名","学校名称","总分"))
for i in range(num):
u=uList[i]
f.write("{:^25}\t{:^25}\t{:^25}\n".format(u[0],u[1],u[2]))
def main():
url="http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html"
uList=[]
html=getHTMLText(url)
fillUnivList(uList,html)
writeInFile("1.txt",uList,20)
#printUnivList(uList,30)
if __name__=="__main__":
main()
中国大学排名爬虫例
最新推荐文章于 2023-02-25 00:04:19 发布