牛肉评论
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ'
}
f = open('牛肉评论.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['100009161505','100009235785','100016273168','100009235831','100018175162','100009307855','100010205682','100011848502','100011848488','100015445637','100015445663','2154645','3926460','100031452532','100031452478']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 2), '.2f')))
猪肉评论
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ'
}
f = open('猪肉评论新.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['100024004047','100024004049','100024004053','100024004055','68204751824','11597710970','100008118481','100008118487','100008118503','100008118501','100008118493','100008118511','100014591902','100014591908']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 2), '.2f')))
羊肉评论
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; mt_xid=V2_52007VwMUU1xdU10aTykJBGEDE1VeX04ISkwYQAA0UBdOVV4FCANOEVkNN1AbBV9YAA4vShhfBnsCGk5cUUNZHkIcXw5jByJQbVhiXRlJGl8CbgITUm1YVF4b; __jda=122270672.170040647.1662570487.1662638740.1662649143.6; mba_muid=170040647; jsavif=1; wlfstk_smdl=3xrhic2zyiy1m4haz2z82lt1ce4gptlt; TrackID=12sC0yMovK7SLxiG7KVpywHWXbK7ycxX-zPQ4W1ayO5In1o6MR5gZIFZUafxeWT_18tnyj6qVykt69fouAAdFZ4f8cCAO0bHwy-7jv89ucrc; thor=6C6F8D919DE139ECE11D28FFF3E5EC84273A857577959D0B03CF0F44FD7D58194A99992F4F5ABEE3C2C43FFBC769D73B6155388EE2BF8FBF8D7C0889F8693448BE7F09429E93F4007D9ECB6ED6E3BBEF9F756043062DF85FA13A3C9F4E957630FF6F10BEA3CE9758A3890C3FFD74959D539A518F5A158853FD57853BA794F21AC2074AEC08B657B3FEC70C83D752F8A0193D3EC703629B1AB38335E42AEE0DA6; pinId=fs_MVnVFxUehygdImyF_b7V9-x-f3wj7; pin=jd_46cd846c5dded; unick=jd_46cd846c5dded; ceshi3.com=000; _tp=uy5RB0M89VjLDNQ7XsGqh5lPpymDUPR0iKlt%2FBf8bEI%3D; _pst=jd_46cd846c5dded; token=2780e6694f589c8dafa7dd0e349b400a,3,923695; __tk=qux31D2YKzxFrDM32cxnrASA2wbt1zM5rYsDqzbtrUTsKwbY1DaDrn,3,923695; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ; shshshsID=50bb9d1cc99e76bac5444016311ec585_14_1662652174060; __jdb=122270672.23.170040647|6.1662649143'
}
f = open('羊肉评论新.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['100016753390','100016753392','100017741418','100034360825','100023134332','100021494638','100000597004','100022399502','57218917697','65891661893','72295690963','100022910853','20676871318','100000679588','100031630216']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 2), '.2f')))
鸡肉评论
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; mt_xid=V2_52007VwMUU1xdU10aTykJBGEDE1VeX04ISkwYQAA0UBdOVV4FCANOEVkNN1AbBV9YAA4vShhfBnsCGk5cUUNZHkIcXw5jByJQbVhiXRlJGl8CbgITUm1YVF4b; __jda=122270672.170040647.1662570487.1662638740.1662649143.6; mba_muid=170040647; jsavif=1; wlfstk_smdl=3xrhic2zyiy1m4haz2z82lt1ce4gptlt; TrackID=12sC0yMovK7SLxiG7KVpywHWXbK7ycxX-zPQ4W1ayO5In1o6MR5gZIFZUafxeWT_18tnyj6qVykt69fouAAdFZ4f8cCAO0bHwy-7jv89ucrc; thor=6C6F8D919DE139ECE11D28FFF3E5EC84273A857577959D0B03CF0F44FD7D58194A99992F4F5ABEE3C2C43FFBC769D73B6155388EE2BF8FBF8D7C0889F8693448BE7F09429E93F4007D9ECB6ED6E3BBEF9F756043062DF85FA13A3C9F4E957630FF6F10BEA3CE9758A3890C3FFD74959D539A518F5A158853FD57853BA794F21AC2074AEC08B657B3FEC70C83D752F8A0193D3EC703629B1AB38335E42AEE0DA6; pinId=fs_MVnVFxUehygdImyF_b7V9-x-f3wj7; pin=jd_46cd846c5dded; unick=jd_46cd846c5dded; ceshi3.com=000; _tp=uy5RB0M89VjLDNQ7XsGqh5lPpymDUPR0iKlt%2FBf8bEI%3D; _pst=jd_46cd846c5dded; token=2780e6694f589c8dafa7dd0e349b400a,3,923695; __tk=qux31D2YKzxFrDM32cxnrASA2wbt1zM5rYsDqzbtrUTsKwbY1DaDrn,3,923695; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ; shshshsID=50bb9d1cc99e76bac5444016311ec585_14_1662652174060; __jdb=122270672.23.170040647|6.1662649143'
}
f = open('鸡肉评论.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['1904033','11828234904','1904056','3901551','100007021816','100005012320','3936157','3901547','2136193','100021752502','100036115056','100016729022','100009920113','100012521838','100013951564','100007991207']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 2), '.2f')))
鸭肉评论
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
from fake_useragent import UserAgent
headers={
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
# 'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ',
'User-Agent': UserAgent().random
}
f = open('鸭肉评论最最终.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['10030440439616','10030440439615','10024510614096','10032791578778','10033508902560','10035680919445','10035680919444','100013702803','100023960924','100023960908','100023960904','100025691332']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 2), '.2f')))
好评获取
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
from fake_useragent import UserAgent
headers={
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
# 'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ',
'User-Agent': UserAgent().random
}
f = open('好评评论.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['10030440439616','10030440439615','10024510614096','10032791578778','10033508902560','10035680919445','10035680919444','100013702803','100023960924','100023960908','100023960904','100025691332']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=3&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 5), '.2f')))
中评获取
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
from fake_useragent import UserAgent
headers={
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
# 'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ',
'User-Agent': UserAgent().random
}
f = open('中评评论.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['10030440439616','10030440439615','10024510614096','10032791578778','10033508902560','10035680919445','10035680919444','100013702803','100023960924','100023960908','100023960904','100025691332']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=2&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 5), '.2f')))
差评获取
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100025128936&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
import re
import requests
import csv
import random
import time
import re
from fake_useragent import UserAgent
headers={
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
# 'Cookie':'unpl=JF8EAK1nNSttWRgGARMGSBIXGFhcW1pcGERWZ2FWXQ8ITgMBHAQcEkN7XlVdXhRKHx9ubhRVXVNKXQ4aCysSEXteXVdZDEsWC2tXVgQFDQ8VXURJQlZAFDNVCV9dSRZRZjJWBFtdT1xWSAYYRRMfDlAKDlhCR1FpMjVkXlh7VAQrAh0TEU5cU1tbOHsQM19XAlFaXEtVNRoyGiJSHwFdXVUKTBROb2EEVVhZTFEDKwMrEQ; __jdu=170040647; areaId=13; PCSYCityID=CN_370000_370900_0; shshshfpa=d13cd1dc-5642-0f9b-e976-ec229feb579d-1662570491; shshshfpb=qzRhxau6Qqa2HQrqI5HSfpQ; __jdc=122270672; __jdv=122270672|baidu|-|organic|notset|1662570520701; shshshfp=2aff9b962254e92011938f034976e9b4; ip_cityCode=1112; ipLoc-djd=13-1112-46665-46698; jwotest_product=99; mt_xid=V2_52007VwMUU1xdU10aTylbB24BQlcKUE5ZGk9NQAAzBxJODllUCQNKSglQNVNCBQpaVVMvShhfBnsCGk5cUUNbH0IZWg5lASJQbVhiUxlOHVUHYwQbUl1ZUVkdQRxZB1cDFFZZ; jsavif=1; __jda=122270672.170040647.1662570487.1662570521.1662614449.3; JSESSIONID=E4183D4825A3F25B79066C6FAB065E13.s1; token=631272b9b7166cb267e9a2f9907da026,2,923675; __tk=f37514b34c6ce7e350f7e33722bc6f58,2,923675; shshshsID=daa07f7ca60bb8ada946f160dad78a29_3_1662615826441; __jdb=122270672.3.170040647|3.1662614449; 3AB9D23F7A4B3C9B=3G7NRO6TRKPOSZN7QIQA75I36DUPPJUCM735CSVTFQHNTIPGFBRM42YWWAJU53IBGTMCBX5TUVJDGS5CWSS3RXD2YQ',
'User-Agent': UserAgent().random
}
f = open('差评评论.csv',mode='a',encoding='gbk',newline='')
csv_writer = csv.writer(f)
csv_writer.writerow(['用户名称','用户头像','评分','评论内容','评论时间','点赞数','评论回复人数','商品属性','页面标题','评论照片链接','评论视频链接','sku','好评度小数表示','好评度百分比表示','全部评价数','视频晒单数','追评数','好评数','中评数','差评数','评价关键词'])
zs=['10030440439616','10030440439615','10024510614096','10032791578778','10033508902560','10035680919445','10035680919444','100013702803','100023960924','100023960908','100023960904','100025691332']
for i in zs:
print(f"--------------------正在爬取{i}网址-----------------------")
for page in range(0,100,1):
print(f"-------------------正在爬取第{page}页----------------------")
url=f'https://club.jd.com/comment/productPageComments.action?productId={i}&score=1&sortType=5&page={page}&pageSize=10&isShadowSku=0&fold=1'
response = requests.get(url=url,headers=headers)
# print(response.text)
json_data = response.json()
# print(json_data['comments'])
comment_list = json_data['comments']
for comment in comment_list:
nickname = comment['nickname'] #用户名称
userImageUrl = 'https://' + str(comment['userImageUrl']) #用户头像
score = comment['score'] # 评分
content = comment['content'] #评论内容
content = re.sub(r'[\t\n]','',content)
creationTime = comment['creationTime'] #评论时间
usefulVoteCount = comment['usefulVoteCount'] #点赞数
replyCount = comment['replyCount'] # 评论回复人数
productColor = comment['productColor'] #商品属性
productSize = comment['referenceName'] #页面标题
try:
images = comment['images']
for zp in images:
imgUrl = 'https:' + str(zp['imgUrl']) #评论照片链接
videos = comment['videos']
for lj in videos:
# mainUrl = lj['mainUrl'] #评论照片链接
remark = lj['remark'] #评论视频链接
except:
pass
productCommentSummary = json_data['productCommentSummary']
productId = productCommentSummary['productId'] #sku
goodRate = productCommentSummary['goodRate'] # 好评度小数表示
goodRateShow = str(productCommentSummary['goodRateShow']) + '%' # 好评度百分比表示
commentCountStr = productCommentSummary['commentCountStr'] # 全部评价数
videoCountStr = productCommentSummary['videoCountStr'] # 视频晒单数
afterCountStr = productCommentSummary['afterCountStr'] # 追评数
goodCountStr = productCommentSummary['goodCountStr'] # 好评数
generalCountStr = productCommentSummary['generalCountStr'] # 中评数
poorCountStr = productCommentSummary['poorCountStr'] # 差评数
# print(nickname,userImageUrl,score,content,creationTime,usefulVoteCount,replyCount,productColor,productSize,imgUrl,remark)
hotCommentTagStatistics_list = json_data['hotCommentTagStatistics']
gjc=[]
for hotCommentTagStatistics in hotCommentTagStatistics_list:
# gjc=[]
name = hotCommentTagStatistics['name'] #评价关键词
count = hotCommentTagStatistics['count'] #评价关键词数
guanjianci = str(name) + '(' + str(count) + ')' #评价关键词汇总
gjc.append(guanjianci)
# print(gjc)
csv_writer.writerow(
[nickname, userImageUrl, score, content, creationTime, usefulVoteCount, replyCount, productColor, productSize,
imgUrl, remark, productId, goodRate, goodRateShow, commentCountStr, videoCountStr, afterCountStr, goodCountStr,
generalCountStr, poorCountStr,str(gjc).replace('[','').replace(']','').replace("'",'').replace(',','')]
)
# print(productId,goodRate,goodRateShow,commentCountStr,videoCountStr,afterCountStr,goodCountStr,generalCountStr,poorCountStr)
time.sleep(float(format(random.uniform(0, 5), '.2f')))