from urllib import request, error
import re
import json
# 使用代理服务器
def use_proxy(url, proxy_addr):
# https://www.xicidaili.com/nn/
proxy = request.ProxyHandler({'http': proxy_addr})
opener = request.build_opener(proxy, request.HTTPHandler)
# 添加为全局
request.install_opener(opener)
data = request.urlopen(url).read().decode('utf-8', 'ignore')
return data
proxy_addr = '163.204.243.242:9999'
cursor = '6573212673981793805'
_ = 1568989058882
for i in range(0, 10):
url = 'https://video.coral.qq.com/varticle/4100808418/comment/v2?callback=_varticle4100808418commentv2&orinum=10&oriorder=o&pageflag=1&cursor='+ cursor + '&scorecursor=0&orirepnum=2&reporder=o&reppageflag=1&source=132&_=' + str(_)
ret = use_proxy(url, proxy_addr)
pat_cursor = '"last":"(.*?)"'
cursor = re.compile(pat_cursor).findall(ret)[0]
_ = _ + 1
pat_comment = '"content":"(.*?)"'
list = re.compile(pat_comment).findall(ret)
print(list)
抓包分析实战爬取腾讯视频评论
最新推荐文章于 2024-05-04 11:36:04 发布