话不多说直接上代码
测试用例 :
搜索java为关键词的 时长在60分钟以上的视频
import requests
import json
import os
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4302.0 Safari/537.36"
}
def get_json(url):
req = requests.get(url, headers=headers).text
start_with_one = req.find('window.__INITIAL_STATE__=')
end_with_one = req.find("""(function(){var s;(s=document.currentScript""")
result_one = req[start_with_one: end_with_one]
start_with_two = result_one.find('result')
end_with_two = result_one.find(',"noMore"')
result_two = result_one[start_with_two:end_with_two]
start_with_three = result_two.find('[')
result_json = result_two[start_with_three:]
return result_json
def json_txt(dic_json):
for i in range(0, len(dic_json)):
for key in dic_json[i]:
print("%s : %s" % (key, dic_json[i][key]))
if __name__ == "__main__":
# 关键字 keyword
keyword = "java"
# 时长 duration 1: 10分钟以下 2: 10-30分钟 3: 30-60分钟 4: 60分钟以上
duration = 4
# 排序 order click: 最多点击 pubdate: 最新发布 dm: 最多弹幕 stow: 最多收藏
order = "pubdate"
# 分区 1=0 1 代表一级分类 0 代表全部
tids_ = "1=0"
page = 1
url = "https://search.bilibili.com/all?keyword={}&order={}&duration={}&tids_{}&page={}".format(keyword, order,
duration, tids_,
page)
# 获取json数据
result = get_json(url)
# 格式化成 str转为json
json_value = json.loads(result)
# 遍历打印json
json_txt(json_value)
os.system("pause");
输出文件:
import requests
import time
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4302.0 Safari/537.36"
}
def get_json(url):
req = requests.get(url, headers=headers).text
start_with_one = req.find('window.__INITIAL_STATE__=')
end_with_one = req.find("""(function(){var s;(s=document.currentScript""")
result_one = req[start_with_one: end_with_one]
start_with_two = result_one.find('result')
end_with_two = result_one.find(',"noMore"')
result_two = result_one[start_with_two:end_with_two]
start_with_three = result_two.find('[')
result_json = result_two[start_with_three:]
return result_json
def json_write(keyword,result):
file_name = "bilibili-search-{}-{}.json".format(keyword, time.strftime("%Y-%m-%d", time.localtime()))
with open(file_name, 'a+', encoding='utf-8') as file:
file.write(result)
if __name__ == "__main__":
# 关键字 k+eyword
keyword = input("请输入搜索关键词: ")
# 时长 duration 1: 10分钟以下 2: 10-30分钟 3: 30-60分钟 4: 60分钟以上
duration = input("时长 1: 10分钟以下 2: 10-30分钟 3: 30-60分钟 4: 60分钟以上 : ")
# 排序 order click: 最多点击 pubdate: 最新发布 dm: 最多弹幕 stow: 最多收藏
order_list = ['click', 'pubdate', 'dm', 'stow']
order_in = int(input("排序规则: 1 .最多点击 2.最新发布 3.最多弹幕 4.最多收藏 : "))
if order_in<1 and order_in> 4:
order_in =1
order = order_list[order_in-1]
# 分区 1=0 1 代表一级分类 0 代表全部
tids_ = "1=0"
page_totol = int(input("需要获取多少页 的数据 : "))+1
for page in range(1,page_totol):
url = "https://search.bilibili.com/all?keyword={}&order={}&duration={}&tids_{}&page={}".format(keyword, order,
duration, tids_,
page)
result_json = get_json(url)
if page ==(page_totol-1):
result = ','+result_json[1:]
elif page ==1:
result =result_json[:-1]+','
else:
result =result_json[1:-1]
print(result)
json_write(keyword,result)