完整代码如下:
import MySQLdb
from bs4 import BeautifulSoup
import requests
import re
db = MySQLdb.connect(host='localhost',user='root',passwd='123456',db='python',charset="utf8")#连接数据库(地址,用户名,密码,数据库名)
print("数据库连接成功")
cur = db.cursor()#取游标
def getHTMLText(url):
kv={'user-agent':'Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0'}
try:
r=requests.get(url,headers=kv,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return "-1"
def parsePage(ilt,html):
try:
soup=BeautifulSoup(html,'html.parser')
plt=soup.find_all('div',attrs={'class':'p-price'})#提取所有class为p-price的标签
tlt=soup.find_all('div',attrs={'class':'p-name p-name-ty