爬取腾讯视频评论——以《三生三世,十里桃花》为例

#@kaiyiching
import requests
import re
import json
import io
import sys
import datetime
import time,random
import csv
import openpyxl 

sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')

#每一集视频的ID地址
vids = [ 
'1743283224','1743283291','1744194068','1744194109','1745133738','1745133746','1746125178','1746125211',  '1747460419','1747460409','1748886128','1750117919','1750117958','1751794201','1751794196','1753505550',  '1753505443','1755270031','1755269984','1757037177','1757037063','1758825127','1758825036','1760156472',   '1761387060','1761387007','1763172513','1763172469','1764949073','1764949060','1766727104','1766727112',  '1768551535','1768551548','1770382580','1770382609','1771753979','1773023416','1773023418','1774863174',   '1774863184','1776738550','1776738647','1778383113','1778383305','1780599951','1780599966','1782554392',   '1782554400','1783960585','1785270823','1785270863','1787131235','1787131345','1789058581','1789058581',
'1791013007','1791013125'
]

#评论的起始ID
start_commids = [
['6616644405906130203'],['6616664859394932771'],['6616672316945937236'],['6615962119770492170'],
['6616638773667540360'],['6616672659104676103'],['6616611398839191565'],['6616638804599519528'],
['6616641285764871460'],['6615805231621550508'],['6616632515422000843'],['6614549204063199940'],
['6616653318569078954'],['6616508559151056021'],['6616438746806460645'],['6614892737450189021'],
['6616541726632325456'],['6616491007411730598'],['6616482905998308867'],['6616650046032966419'],
['6616380103377493114'],['6616514002842793872'],['6616666724605826041'],['6615947305712546108'],
['6616673826644673892'],['6616668964007379730'],['6616562399103268939'],['6614501476259102517'],
['6615580397840873278'],['6616560843789979554'],['6615501928859976367'],['6616595094387017719'],
['6615857216740328480'],['6616563955453735997'],['6614396586108066634'],['6615562549724787697'],
['6615797262117749567'],['6616611457506818250'],['6616678069939487169'],['6616600135378079490'],
['6616655145532062117'],['6616559226108496403'],['6616670371548997639'],['6615186649276466275'],
['6616649222480168935'],['6616663931755886339'],['6616536528585866717'],['6616645511367176703'],
['6616681357352563095'],['6616675089028151111'],['6616631206095599091'],['6616282701895279731'],
['6616680859552956805'],['6616345117194691322'],['6616324008295513548'],['6616324008295513548'],
['6616670658119611261'],['6616600879377658275']
]

infos = []

for k in range(5):   
# k表示集数,为了不给服务器太大压力,所以只设置了爬取5集的部分评论
    vid = vids[k]
    commids = start_commids[k]

    for i in range(10):
    # 这里的10表示每一集爬取的评论数量
        commid = commids[i]
        vurl='https://video.coral.qq.com/varticle/1743283224/comment/v2?callback=_varticle'+vid+'commentv2&orinum=10&oriorder=t&pageflag=1&cursor='+str(commid)+'&scorecursor=&orirepnum=2&reporder=o&reppageflag=1&source=132&_=1577498740192'
        # 获取url的内容
        res_vurl = requests.get(vurl)
        # url内容为字节,故需要转换成str,因为re模块作用于str
        res_vurl_1 = bytes.decode(res_vurl.content)
        # 进行re.sub进行替换无用的字符串,以及首尾两个括号,使格式变为json
        dic_data = re.sub('_varticle'+str(vid)+'commentv2|\(|\)','',res_vurl_1)
        # 进行str转字节
        data_bytes = str.encode(dic_data)

        # 读取data内容
        data = json.loads(data_bytes)['data']
        last_commid = data['last']
        print(last_commid)
        commids.append(last_commid)

        comments  = data['oriCommList']

        for comment in comments:
            time = comment['time']
            # 将时间戳转化成日期
            timeStamp = int(time)
            dateArray = datetime.datetime.utcfromtimestamp(timeStamp)
            ep = k+1
            date = dateArray.strftime("%Y-%m-%d") 
            content = comment['content']
            infos.append([ep,date,content])


#将爬取的内容写入Excel表格:
wb = openpyxl.Workbook() 
sheet = wb.active
sheet.title = 'tencent-comment'
sheet['A1'] = '三生三世,十里桃花'
for info in infos:
    sheet.append(info)
# 存储表格
wb.save('Tencent-comment.xlsx')

至于怎么批量获取视频的ID和评论的ID我之后再想想办法。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值