#按F12在浏览器中查看
"User-Agent":"浏览器的User-Agent",
"Cookie": "浏览器的Cookie",
"Referer": "浏览器的Referer"
# 导入数据请求模块
import requests
# 导入正则表达式模块
import re
import json
from bs4 import BeautifulSoup
def ask(url):
# 模拟浏览器
headers = {
#按F12在浏览器中查看
"User-Agent":"浏览器的User-Agent",
"Cookie": "浏览器的Cookie",
"Referer": "浏览器的Referer"
}
# 发送请求
response = requests.get(url=url, headers=headers)
# 获取服务器返回相应文本数据
html = response.text
# json字符串数据转成json字典数据
json_data = json.loads(html)
return json_data
def ask1(url):
# 模拟浏览器
headers = {
#按F12在浏览器中查看
"User-Agent":"浏览器的User-Agent",
"Cookie": "浏览器的Cookie",
"Referer": "浏览器的Referer"
}
# 发送请求
response = requests.get(url=url, headers=headers)
# 获取服务器返回相应文本数据
html = response.text
return html
# 请求链接
url ="想要爬取的b站up视频主页链接"
json_data = ask(url)
j = 0
for i in range(30):
# 提取视频标题
title = json_data['data']['list']['vlist'][i]['title']
# 提取视频bv号
bv = json_data['data']['list']['vlist'][i]['bvid']
print(title)
print(bv)
bl_url = "https://www.bilibili.com/"
v_url = bl_url+bv
# 发起网络请求,获取服务器响应的页面内容
response = requests.get(v_url, timeout=30, headers=headers)
content = response.text # 获取响应的文本内容
soup = BeautifulSoup(content, "html.parser")
text = soup.find_all(
"meta", attrs={"data-vue-meta": "true", "itemprop": "description"})
text_str = str(text)
# 根据逗号分隔文本内容,得到一个列表
text_list = text_str.split(',')
# 在列表中找到包含"视频播放量"信息的元素
play_count_info = [item for item in text_list if '视频播放量' in item]
print(play_count_info)