python selenium爬取 评论 爬虫 代码实现

  • 因为爬取QQ音乐数据做文本分析,所以使用SnowNLP打上了标签,以下代码注释了打标签过程,需要自己解开
  • 需要使用 webdriver 这里使用的是chrome浏览器的参考安装
import pandas as pd
from snownlp import SnowNLP

# tag = []
comment_text = []
# print(get_all_hotSong())
from selenium import webdriver
from icecream import ic
import time
import csv
# 驱动加载
driver = webdriver.Chrome()
def get_data(music_url):  # 获取热歌榜所有歌曲名称和id

    driver = webdriver.Chrome()
    # 打开网站
    driver.get(music_url)

    # 等待网页加载完成,不是死等;加载完成即可
    driver.implicitly_wait(10)
    # is_done=driver.find_element_by_id('loading').text
    js_button = 'document.documentElement.scrollTop=10000000'

    # 执行js,滑动到最底部
    for i in range(100):
        # js_button = 'document.documentElement.scrollTop=100000'
        driver.execute_script(js_button)
        time.sleep(1.5)
        print(i)
    print("滚动条已经处于页面最下方!")


    divs = driver.find_elements_by_css_selector('p.comment__text')  # 标签 和 class 属性筛选

    for div in divs:
        if div.text == "":
            continue
        comment_text.append(div.text)

    # print(comment_text)

   # for text in comment_text:

   #     res = SnowNLP(str(text))
   #     if res.sentiments<0.5:
   #         tag.append(0)
   #     else:tag.append(1)
url_all = ['https://y.qq.com/n/ryqq/toplist/62','https://y.qq.com/n/ryqq/toplist/26','https://y.qq.com/n/ryqq/toplist/27','https://y.qq.com/n/ryqq/toplist/4','https://y.qq.com/n/ryqq/toplist/57','https://y.qq.com/n/ryqq/toplist/5','https://y.qq.com/n/ryqq/toplist/3','https://y.qq.com/n/ryqq/toplist/16','https://y.qq.com/n/ryqq/toplist/17','https://y.qq.com/n/ryqq/toplist/28','https://y.qq.com/n/ryqq/toplist/108','https://y.qq.com/n/ryqq/toplist/129','https://y.qq.com/n/ryqq/toplist/107','https://y.qq.com/n/ryqq/toplist/105','https://y.qq.com/n/ryqq/toplist/58']

get_data(url_all[12])

comment_text_tag = pd.DataFrame()
comment_text_tag["comment"] = comment_text
# comment_text_tag["tag"] = tag
comment_text_tag.to_csv("./comment_text_tag13.csv")

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值