提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
一、代码
我就不介绍了,直接看源码吧
import csv
import requests,time
import re
f = open("敦煌数据13.csv", mode = "w",encoding = 'utf-8')
csvwriter = csv.writer(f)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
data = {
"spm_id_from:" "333.337.search-card.all.click"
"vd_source": "87b6bd48aed6eabaf100cec8100d68f4"
}
pages=28
o=36
titel = ""
guanjianci = ""
bofang = ""
danmu = ""
dianzhan = ""
toubi = ""
shoucang = ""
zhuanfa = ""
zuozhe = ""
time1 = ""
r1 = re.compile('<meta data-vue-meta="true" itemprop="keywords" name="keywords" content="(?P<titel>.*?),(?P<guanjianci>.*?),哔哩哔哩,bilibili,B站,弹幕">')
r2 = re.compile(r'视频播放量(?P<bofang>.*?)、弹幕量(?P<danmu>.*?)、点赞数(?P<dianzhan>.*?)、投硬币枚数(?P<toubi>.*?)、收藏人数(?P<shoucang>.*?)、转发人数(?P<zhuanfa>.*?), 视频作者(?P<zuozhe>.*?)作者简介', re.S)
r3 = re.compile('<meta data-vue-meta="true" itemprop="uploadDate" content="(?P<time1>.*?)">')
r4 = re.compile('<a href="//(?P<html6>.*?)" target="_blank"')
for page in range(1, pages):
url = (f'https://search.bilibili.com/all?vt=01409527&keyword=%E6%95%A6%E7%85%8C&from_source=webtop_search&spm_id_from=333.1007&search_source=3&page={page})&o={o}')
req = requests.get(url,headers=headers)
# print(req.text)
# html2 = etree.HTML(req.text)
html3 = r4.finditer(req.text)
for it in html3:
it1 = "https://" + it.group("html6") +"?spm_id_from=333.337.search-card.all.click&vd_source=87b6bd48aed6eabaf100cec8100d68f4"
result = requests.get(it1, headers=headers)
result1 = result.text
re1 = r1.finditer(result1)
for i in re1:
titel = i.group("titel")
guanjianci = i.group("guanjianci")
re2 = r2.finditer(result1)
for aa in re2:
bofang = aa.group("bofang")
print(bofang)
danmu = aa.group("danmu")
dianzhan= aa.group("dianzhan")
toubi = aa.group("toubi")
shoucang = aa.group("shoucang")
zhuanfa = aa.group("zhuanfa")
zuozhe = aa.group("zuozhe")
re3 = r3.finditer(result1)
for aia in re3:
time1 = aia.group("time1")
csvwriter.writerow([titel, guanjianci,danmu, bofang, dianzhan, toubi, shoucang, zhuanfa,zuozhe,time1])
time.sleep(1)
o = o+36
f.close()