爬取微博评论数据

XSJ.寒冰之雪

于 2024-04-18 09:57:35 发布

阅读量1.5k

点赞数 47

文章标签： python beautifulsoup 开发语言

本文链接：https://blog.csdn.net/fasdaegaw/article/details/137907909

版权

# -*- coding: utf-8 -*-
import requests #用于发送请求并且拿到源代码
from bs4 import BeautifulSoup #用于解析数据
'''
1.找到数据源地址并且分析链接
2.发送请求并且拿到数据
3.在拿到的数据中解析出需要的数据
4.存储数据
'''

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "cross-site"
}
cookies = {
    "SUB": "_2A25LJAI1DeRhGeNG6VQS8yHXVoWBv9rDV8PUNbmtAbLUfTkW9NS3qSAB5yafaDOCMSBsSAAzS_ZARZ1aBI",
    "SUBP": "0033WrSXqPxfM72_qlqsxnLSjV6mMYf5JpX5KzhUgL.Fo-Reoq0e0BpeKM2dJLoIEUq-XQLxK-LB-qL1KzLxK-L1hqLBo5LxKBLBo.L12zLxK.L1-zLB-2t",
    "SINAGLOBAL": "675.476.1705404058457",
    "ULV": "1713402436:2:1:4927205619754.432.1713402429966:1712029972225",
    "UOR": ",,www.baidu.com",
    "_s_tentry": "weibo.com",
    "Apache": "49272056197.1713402429966",
    "ALF": "02_1714469",
    "PC_TOKEN": "614a985b"
}
li=['美团','饿了么','淘宝']
f=open('x.txt','w',encoding='utf-8')
for word in li:
    for ix in range(1,4):
        url=f'https://s.weibo.com/weibo?q={word}&page={ix}'
        response=requests.get(url,headers=headers,cookies=cookies)
        res=response.text #.text用于取出源代码
        #print(res)
        '''
        对方设置了一些规则：
            对方会验证这个请求是不是浏览器发来的
                让对方认为我就是浏览器发过去的请求

        '''
        #requests.post()


        #************************************************************************************
        html=BeautifulSoup(res,'lxml') #初始化
        #print(html)
        att={
             'node-type':'feed_list_content'
             }
        bs=html.find_all(attrs=att)
        #************************************************************************************



        for i in bs:
            f.write(i.text+'\n')
f.close()