爬取微博客户端用户评论

爬取微博客户端用户评论

'''
# -*- coding:utf-8 -*-
@FileName: weibo_comment.py
@Date    : 2018/12/5 13:01
@Author  : 风夏
@Hobby   : mi mi 猫
'''

import requests
import json
import time
import emoji       #pip install emoji
import schedule    #pip install schedule
import re

def task():
    print("当前时间::"+time.strftime("%Y-%m-%d", time.localtime(time.time()))+' '+time.strftime("%H:%M:%S",time.localtime(time.time())))
    # with open('comment.txt', 'r') as f:
    #     file = f.read()
    for i in range(1, 3):  #微博客户端网站目前只能爬取16页数据
        print('====================第{0}页===================='.format(i))
        header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'
        }

        if i == 1:
            url = 'https://m.weibo.cn/comments/hotflow?id=4578675531450036&mid=4578675531450036&max_id_type=0'
        else:
            url = 'https://m.weibo.cn/comments/hotflow?id=4578675531450036&mid=4578675531450036&max_id={0}&max_id_type=0'.format(max_id)
        cookie = {
            # 'cookie': 'SINAGLOBAL=2047840506036.7466.1593615163995; login_sid_t=cb30a1f6e67792a0335c3f33548f6668; cross_origin_proto=SSL; _s_tentry=www.baidu.com; UOR=,,www.baidu.com; wb_view_log=1536*8641.25; Apache=5893161647764.307.1607144696612; ULV=1607144696622:3:1:1:5893161647764.307.1607144696612:1597844770933; ALF=1638680740; SSOLoginState=1607144740; SUB=_2A25yz2V1DeRhGeNN71MX8i3JzDuIHXVRvdG9rDV8PUNbmtAKLXPgkW9NSaEirp-WyJ2_7j_uxZVqfNFXn836Ounx; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFxSSvsZRv-x2F6AKv8nL435JpX5KzhUgL.Fo-0Sh2ceoefS0M2dJLoIEBLxK-LBo5L12qLxKML12eLB-zLxKqLBKzLBKMLxKqLBK2L1K5t; wvr=6; wb_view_log_5341623577=1536*8641.25; webim_unReadCount=%7B%22time%22%3A1607146504278%2C%22dm_pub_total%22%3A0%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A4%2C%22msgbox%22%3A0%7D',
            'cookie':'_T_WM=85071206373; WEIBOCN_FROM=1110006030; XSRF-TOKEN=9bf1a0; SUB=_2A25yz27UDeRhGeNN71MX8i3JzDuIHXVuMHKcrDV6PUJbktCOLUndkW1NSaEirof-g7dxz5Gao5BdGJb0jv5aSO6i; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFxSSvsZRv-x2F6AKv8nL435NHD95Qfe0BpSoz0SKMNWs4Dqcj.i--fi-z7iKysi--NiKyhi-8Fi--ci-2Ei-2Ni--ci-2piK.7; SSOLoginState=1607147140; MLOGIN=1',

        }
        response = requests.get(url, headers=header, cookies=cookie)
        text = json.loads(response.text)['data']
        max_id = text['max_id']
        comments = text['data']
        for j in comments:
            comment = j['text']   #评论内容
            comment = emoji.demojize(comment)#去除表情
            comment = re.sub(":\S+?:"," ",comment)

            name = j['user']['screen_name']   #评论者昵称
            # name = emoji.demojize(name)
            # name = re.sub(":\S+?:"," ",name)
            created_at = j['created_at']   #评论时间
            print(name, created_at, comment)
            with open("comment.txt", "a",encoding='utf-8') as f:
                f.write(name + " " + created_at + " " + comment + "\n")
            # if comment in file:
            #     pass
            # else:
            #     try:
            #         with open("1.txt", "a") as f:
            #             f.write(name + " " + created_at + " " + comment + "\n")
            #     except:
            #         pass
        time.sleep(2)  #防止爬得太快被反爬,进行睡眠2s

# schedule.every(30).minutes.do(job) #定时任务,每隔30分钟调用一次job函数
schedule.every(20).seconds.do(task)  #定时任务,每隔20秒调用一次job函数
# task()
while True:
    # print("当前时间:",time.strftime('%Y-%M-%D %H:%M:%S',time.localtime(time.time())))
    # time.sleep(1)
    schedule.run_pending()


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值