import pandas as pd
from snownlp import SnowNLP
tag = []
comment_text = []
from selenium import webdriver
from icecream import ic
import time
import csv
driver = webdriver.Chrome()
def get_data(music_url):
driver = webdriver.Chrome()
driver.get(music_url)
driver.implicitly_wait(10)
js_button = 'document.documentElement.scrollTop=10000000'
for i in range(100):
driver.execute_script(js_button)
time.sleep(1.5)
print(i)
print("滚动条已经处于页面最下方!")
divs = driver.find_elements_by_css_selector('p.comment__text')
for div in divs:
if div.text == "":
continue
comment_text.append(div.text)
for text in comment_text:
res = SnowNLP(str(text))
if res.sentiments<0.5:
tag.append(0)
else:tag.append(1)
url_all = ['https://y.qq.com/n/ryqq/toplist/62','https://y.qq.com/n/ryqq/toplist/26','https://y.qq.com/n/ryqq/toplist/27','https://y.qq.com/n/ryqq/toplist/4','https://y.qq.com/n/ryqq/toplist/57','https://y.qq.com/n/ryqq/toplist/5','https://y.qq.com/n/ryqq/toplist/3','https://y.qq.com/n/ryqq/toplist/16','https://y.qq.com/n/ryqq/toplist/17','https://y.qq.com/n/ryqq/toplist/28','https://y.qq.com/n/ryqq/toplist/108','https://y.qq.com/n/ryqq/toplist/129','https://y.qq.com/n/ryqq/toplist/107','https://y.qq.com/n/ryqq/toplist/105','https://y.qq.com/n/ryqq/toplist/58']
get_data(url_all[12])
comment_text_tag = pd.DataFrame()
comment_text_tag["comment"] = comment_text
comment_text_tag["tag"] = tag
comment_text_tag.to_csv("./comment_text_tag13.csv")
