改代码(自己写不出来 )
网址:成都大熊猫基地https://m.ctrip.com/webapp/ticket/commentlist?spotid=4229&spotname=%E6%88%90%E9%83%BD%E5%A4%A7%E7%86%8A%E7%8C%AB%E7%B9%81%E8%82%B2%E7%A0%94%E7%A9%B6%E5%9F%BA%E5%9C%B0&catid=0
headers加了三个就可以。
使用搜索“高峰”找到图片中的这个网址viewcommentlist(https://sec-m.ctrip.com/restapi/soa2/12530/json/viewCommentList?_fxpcqlniredt=09031037312071779645)(存放评论的网址,这个比较重要,不好找),requests headers复制过去,payload复制,加入page 参数翻页。
获得了response后,从json转为字典,然后一层一层取数据就可以了(使用json在线格式化工具方便查看)。
(需要两层循环,第一层负责翻页,第二层负责遍历页面中的25条评论,并不能自己写,改的)。
import json
import time
import requests
import os.path as osp
postUrl = 'https://sec-m.ctrip.com/restapi/soa2/12530/json/viewCommentList?_fxpcqlniredt=09031037312071779645'
commentFile = osp.join(osp.dirname(__file__), './comment.txt')
def get_payload(page):
payloadData = {"viewid": 4229, "pageid": 10650009792, "tagid": 0, "pagesize": 25, "videoimgsize": "C_348_236",
"pagenum": page, "contentType": "json",
"head": {"cid": "09031037312071779645", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888",
"syscode": "09", "auth": "", "extension": [{"name": "protocal", "value": "https"}]},
"ver": "7.14.2"}
headersParameters = {
"referer": "https://m.ctrip.com/webapp/ticket/commentlist?spotid=4229&spotname=%E6%88%90%E9%83%BD%E5%A4%A7%E7%86%8A%E7%8C%AB%E7%B9%81%E8%82%B2%E7%A0%94%E7%A9%B6%E5%9F%BA%E5%9C%B0&catid=0",
"cookie": "_abtest_userid=31e3832f-2293-4bd3-8dc2-359ec90190bf; Session=SmartLinkCode=U155952&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=&SmartLinkLanguage=zh; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1578024326&Expires=1578629125662; _ga=GA1.2.136497755.1578024326; _gid=GA1.2.405723273.1578024326; MKT_CKID=1578024325755.cf17f.dp1v; MKT_CKID_LMT=1578024325756; _RSG=O1q7Jf.xji9SRS.zaxUEf8; _RDG=28f46d5a3fa7da2a7d1a329b0e241e08fb; _RGUID=9b2defea-531f-4058-8a4e-7bb93df9c6dc; manualclose=1; gad_city=1c34bf42c800b070526271a020335e54; GUID=09031037312071779645; _bfi=p1%3D10650000804%26p2%3D10650000804%26v1%3D7%26v2%3D5; _jzqco=%7C%7C%7C%7C1578024406621%7C1.359967697.1578024325750.1578024547410.1578024638819.1578024547410.1578024638819.undefined.0.0.4.4; __zpspc=9.1.1578024325.1578024638.4%232%7Csp0.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23; MKT_Pagesource=H5; _bfa=1.1578024322684.2jsjut.1.1578024322684.1578033589168.2.11.10650009792; _RF1=117.131.219.56",
"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36"
}
return payloadData, headersParameters
if __name__ == '__main__':
fw = open(commentFile, 'w', encoding='utf8')
page_num = 1
while True:
try:
payloadData, headersParameters = get_payload(page_num)
res = requests.post(postUrl, data=json.dumps(payloadData), headers=headersParameters, timeout=5)
comment_list = res.json(encoding='utf8')["data"]["comments"]
if len(comment_list) == 0:
break
for comment in comment_list:
fw.write(comment['content'] + '\n')
print('page.{0}'.format(page_num))
page_num += 1
except Exception as e:
print(str(e))
break
time.sleep(5)
if page_num > 100:
break
fw.close()