一、爬取京东商品手机的用户评价,包括评价、颜色、手机型号并存入数据库(MySQL)
二、数据库表结构
三、代码
import requests
import time
import json
from pymysql import *
def mes(productId,page):
headers = headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
url='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId={}&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1'.format(productId,page)
resp=requests.get(url,headers=headers)
#print(resp.text)
res=resp.text.replace('fetchJSON_comment98(','')
res=res.replace(');','')
#print(res)
json_data=json.loads(res)
#print(json_data)
return json_data
def getPage(productId):
data=mes(productId,0)
return data['maxPage']
def insert(db,list):
sql = "INSERT INTO pingjia values (0,%s,%s,%s)"
cursor = db.cursor()
tuples=tuple(list)
print(tuples)
try:
cursor.executemany(sql,tuples)
print("插入成功")
db.commit()
cursor.close()
except Exception as e:
print(e)
db.rollback()
pass
def getData(productId):
#maxpage=1
maxpage=getPage(productId)
print(maxpage)
list=[]
for page in range(0,maxpage+1):
mess=mes(productId,page)
mesList=mess['comments']#评论列表
for item in mesList:
content=item['content']
productColor=item['productColor']
referenceName=item['referenceName']
list.append((content,productColor,referenceName))
db = connectDB()
#print(list)
insert(db,list)
# 连接数据库的方法
def connectDB():
try:
db = connect(host='localhost', port=3306, user='root', password='123456', db='mmm')
print("数据库连接成功")
return db
except Exception as e:
print(e)
return NULL
if __name__ == '__main__':
#print(getPage(100013205938))
getData(100011924558)
#mes(100011924558,0)
四、结果