在终端显示前400名大学
import requests
from bs4 import BeautifulSoup
import bs4
import re
def getHTMLtext(url):
try:
r = requests.get(url,timeout = 30)
r.raise_for_status #查看响应是否是200
r.encoding = r.apparent_encoding
return r.text
except:
return ''
def fillUnivList(ulist,html):
soup = BeautifulSoup(html,'html.parser')
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag): #判断是否是标签
tds = tr('td') #找出所有的td标签
ulist.append([tds[0].string,tds[1].string,tds[3].string])
def printUnivList(ulist,num):
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
print(tplt.format("排名","学校名称","总分",chr(12288))) #chr(12288)以中文字符填充空格
for i in range(num):
u = ulist[i]
print(tplt.format(u[0],u[1],u[2],chr(12288)))
def main():
ulist = []
url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
html = getHTMLtext(url)
fillUnivList(ulist,html)
printUnivList(ulist,400)
main()
运行结果:
将前400名大学存入MySQL
创建存放数据的MySQL表:
create table university(
-> id int(5) not null,
-> name char(30) not null,
-> score float(5) not null);
import requests
from bs4 import BeautifulSoup
import bs4
import re
import pymysql
class university():
def __init__(self,url,num):
self.url = url
self.ulist = []
self.num = num
self.db = pymysql.connect(host='127.0.0.1',port = 3306,user='root',password = 'forever1122',db='python')
def getHTMLtext(self):
try:
r = requests.get(self.url,timeout = 30)
r.raise_for_status #查看响应是否是200
r.encoding = r.apparent_encoding
html = r.text
return html
except:
return ''
def fillUnivList(self):
html = self.getHTMLtext()
soup = BeautifulSoup(html,'html.parser')
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag): #判断tr是否是标签
tds = tr('td') #找出所有的td标签
self.ulist.append([tds[0].string,tds[1].string,tds[3].string]) #获取想要的排名,校名,总分
def printUnivList(self):
for i in range(self.num):
u = self.ulist[i]
id = u[0] #排名
name = u[1] #校名
score = u[2] #总分
sql = 'insert into university (id,name,score) values ({},"{}",{})'.format(id,name,score)
self.write_to_mysql(sql)
print("输入完成!!!!!")
def write_to_mysql(self,sql):
cursor = self.db.cursor()
try:
cursor.execute(sql)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
if __name__=="__main__":
un = university('http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html',400)
un.getHTMLtext()
un.fillUnivList()
un.printUnivList()
运行结果: