后浪视频弹幕爬取制作词云
弹幕被删了应该,我这个只有三条,逻辑最重要呀。
啊啊啊啊啊超级想吐槽,因为词云不熟悉,所以浪费了我很多时间 。还好结局是完美的!
还有重要的一点,弹幕和视频是分开的,在network中找到带有小圆圈的一个文件夹,保存网址粘贴到浏览器打开,可以看到所有评论,就可以爬取弹幕了。
import requests,csv
import wordcloud
from bs4 import BeautifulSoup
def parse_page(url):
headers={
"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" }
response=requests.get(url,headers=headers)
text=response.content.decode('utf-8')
# print(text)
soup=BeautifulSoup(text,"lxml")
I=soup.find("i")
# print(I)
d=I.find_all('d')
coment=[]
for ds in d:
danmu=ds.string
coment.append(danmu)
#print(coment)
with open ('b站弹幕.csv','a',newline='',encoding='utf-8-sig')as fp:
writer=csv.writer(fp)
writer.writerow(coment)
# img=np.array(Image.open('baidu.jpg'))
with open('b站弹幕.csv','r',encoding='utf-8')as f:
reader=csv.reader(f)
for text1 in reader:
text=''.join(text1)
print(text)
w=wordcloud.WordCloud(font_path='./fonts/simhei.ttf')
w.generate(text)
w.to_file('HouLang.png')
def main():
url="https://api.bilibili.com/x/v1/dm/list.so?oid=187036562"
parse_page(url)
if __name__ == '__main__':
main()