from bs4 import BeautifulSoup import requests import bs4 def gethtml(): url="http://www.cuaa.net/paihang/news/news.jsp?information_id=137445" try: r=requests.get(url) r.raise_for_status() r.encoding=r.apparent_encoding except: print("爬取失败") return r.text[6600:] def get_usefuldata(ulist,demo): soup = BeautifulSoup(demo,"html.parser") for tr in soup.find('tbody').children: l = [] if isinstance(tr,bs4.element.Tag): p=tr.find_all('p') for i in p: l.append(i.get_text()) ulist.append(l) return ulist def print_info(ulist,num): for i in range(num): u=ulist[i] print("{:<8}{:<12}{:^8}".format(u[0],u[1],u[2])) return def main(): uinfo=[] html=gethtml() get_usefuldata(uinfo,html) print_info(uinfo,150) return main()
python爬虫,爬取软科大学排名
最新推荐文章于 2023-02-25 00:04:19 发布