import requests
from bs4 import BeautifulSoup, builder
import pymysql
def download(url):
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.40"
}
response=requests.get(url,headers=headers)
return response
# 解析网页
def bs(url):
html_text=download(url)
html_text.encoding=html_text.apparent_encoding
soup=BeautifulSoup(html_text.text,'lxml')
#获取分数和省市
sapList=soup.select('#content-box .rk-table tbody>tr')
allsaps=[]
for saptag in sapList:
sap_dict={}
sap_dict['province']=saptag.select('td')[2].get_text().strip()
sap_dict['score']=saptag.select('td')[4].get_text().strip()
sap_dict['rank']=saptag.select('div')[0].get_text().strip()
sap_dict['name']=saptag.select('a')[0].get_text()
sap_dict['type']=saptag.select('td')[3].get_text().strip()
allsaps.append(sap_dict)
print(allsaps)
return allsaps
def get_config(host,user,password,db):
db_config={
'host':host,
'user':user,
'password':password,
'db':db
}
return db_config
def getConn(allsaps):
db_config=get_config('localhost','root','admin','python')
conn=pymysql.Connect(**db_config)
cur=conn.cursor()
for allsap in allsaps:
province=allsap['province']
score=allsap['score']
rank=allsap['rank']
name=allsap['name']
type=allsap['type']
sql="INSERT INTO rank(num,schname,province,type,score)VALUES("+"'"+rank+"'"+","+"'"+name+"'"+","+"'"+province+"'"+","+"'"+type+"'"+","+"'"+score+"'"+")"
cur.execute(sql)
conn.commit()
cur.close()
conn.close()
print("操作完成")
if __name__ == "__main__":
url="https://www.shanghairanking.cn/rankings/bcur/2020"
all=bs(url)
getConn(all)