Python爬取弹幕
在bilibili得到需要爬取的视频的弹幕地址;
https://api.bilibili.com/x/v1/dm/list.so?oid=201056987 #得到bilibili弹幕地址
http://api.bilibili.com/x/player/pagelist?bvid=BV1PK4y1b7dt&jsonp=jsonp #得到视频的oid——cid
https://www.bilibili.com/video/BV1PK4y1b7dt?t=1 #此为视频的地址——其bvid字符串放入上面即可找到cid
用getcid得到cid地址;
getdata得到弹幕数据;
savetofile将弹幕存为txt文件;
import requests
from bs4 import BeautifulSoup
import json
import chardet
import re
from pprint import pprint
def get_cid():
url='http://api.bilibili.com/x/player/pagelist?bvid=BV1PK4y1b7dt&jsonp=jsonp'
res=requests.get(url).text
json_dict=json.loads(res)
return json_dict["data"][0]["cid"]
#得到cid
def get_data(cid):
final_url="https://api.bilibili.com/x/v1/dm/list.so?oid="+str(cid)
final_res=requests.get(final_url)
final_res.encoding=chardet.detect(final_res.content)['encoding']
final_res = final_res.text
pattern = re.compile("<d.*?>(.*?)</d>")
data = pattern.findall(final_res)
return data
def save_to_file(data):
with open("dan_mu.txt",'w',encoding="utf-8"