今天下午自闭
终于是没有什么困难的写出了一个自己觉得还行的爬虫
import requests
import re
heders = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
def url(number):
urls = []
for i in range(number):
url = 'https://api.vc.bilibili.com/link_draw/v2/Doc/list?category=all&type=hot&page_num={}&page_size=20'.format(i)
urls.append(url)
return urls
def getone(num):
picture_url = []
for a in url(num):
i = 0
res = requests.get(a,headers = heders).text
wtf = '"img_src":"(.*?)"'
hahaha = re.findall(wtf,res,re.S)
for b in hahaha:
picture_url.append(hahaha[i])
i = i + 1
return picture_url
def download(num):
t = 0
for a in getone(num):
www = requests.get(a,headers = heders)
with open('E:\\for_bilibili_picture\\{}.jpg'.format(t),'wb')as f:
f.write(www.content)
t = t + 1
download(20)
有很多东西还是不会功能也很简单
(谁能想到是因为bilibili模拟登陆不会,才发现Ajax也可以的)
接下来也要准备探索反反爬虫和昨天说得多线任务了
就像这样!