wb评论获取(一级评论和二级评论)

参考链接:

python爬虫爬取微博评论--完整版(超详细,大学生不骗大学生)_微博爬虫-CSDN博客

完整代码:

#待解决comments中的评论会重复获取
import requests
import csv
import time
import random

#写入文件
f = open('评论814.csv', mode='a', encoding='utf-8-sig', newline='')
csv_write = csv.writer((f))
csv_write.writerow(['text_raw', 'created_at'])

#网络-搜索评论-标头
headers = {
    'cookie' : '',
    'referer': '',
    'user-agent':''
}

#二级评论分页获取
def get_next1(id,max_id=0):
    id = id
    url = f'https://weibo.com/ajax/statuses/buildComments?flow=1&is_reload=1&id={id}&is_show_bulletin=2&is_mix=1&fetch_level=1&max_id={max_id}&count=20&uid=3261134763&locale=zh-CN'
    #url =f'https://weibo.com/ajax/statuses/buildComments?flow=1&is_reload=1&id={id}&is_show_bulletin=2&is_mix=1&fetch_level=1&max_id={max_id}&count=20&uid=7299221051&locale=zh-CN'
    # if max_id:
    #     url += f'&max_id={max_id}'
#第二个分页:https://weibo.com/ajax/statuses/buildComments?flow=0&is_reload=1&id=4982629688151385&is_show_bulletin=2&is_mix=0&max_id=1438881494597&count=20&uid=7299221051&fetch_level=0&locale=zh-CN
    response = requests.get(url=url, headers=headers)
    json_data = response.json() # JSON 数据转换为 Python 的数据结构
    data_list = json_data['data']
    if not data_list:  # 如果没有新的评论,退出循环
        return

    for data in data_list:
        text_raw = data['text_raw']
        created_at = data['created_at']

        print(text_raw,created_at)
        #写入一级评论
        csv_write.writerow([text_raw, created_at])
    new_max_id = json_data['max_id']
    if json_data['max_id'] == 0:
        return
    time.sleep(random.random() * 10)
    get_next1(id,new_max_id)



#获取二级评论
def get_second_comments(id,total_number,comments,max_id=None):
    if total_number == 0:
        return
    else:
        for comment in comments:
            text_raw = comment['text_raw']
            created_at = comment['created_at']
            csv_write.writerow([text_raw, created_at])
            print(comment['text_raw'],comment['created_at'])
        #调用二级评论的get_next1
        id = id
        get_next1(id)
        #访问二级评论的url,获取评论


#获取一级评论
def get_next0(max_id=None):
    #url = f'https://weibo.com/ajax/statuses/buildComments?is_reload=1&id=4982629688151385&is_show_bulletin=2&is_mix=0&count=10&uid=7299221051&fetch_level=0&locale=zh-CN'
    url = f'https://weibo.com/ajax/statuses/buildComments?is_reload=1&id=5065928205146230&is_show_bulletin=2&is_mix=0&count=10&uid=3261134763&fetch_level=0&locale=zh-CN'
    if max_id:
        url += f'&max_id={max_id}'
    response = requests.get(url=url, headers=headers)
    json_data = response.json() # JSON 数据转换为 Python 的数据结构
    data_list = json_data['data']
    if not data_list:  # 如果没有新的评论,退出循环
        return

    for data in data_list:
        text_raw = data['text_raw']
        created_at = data['created_at']

        print(text_raw,created_at)
        #写入一级评论
        csv_write.writerow([text_raw, created_at])
        #调用获取二级评论的函数,传mid参数、total_number、comments
        get_second_comments(data['id'],data['total_number'],data['comments'])


    if json_data['max_id'] == 0:
        return
    new_max_id = json_data['max_id']
    time.sleep(random.random() * 10)
    get_next0(new_max_id)

get_next0()





  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值