一, 需求说明
1.我想要保存喜欢的up公开的相薄的所有图片和下方配文
2. 我想要记录一下每条动态的发布时间
3. 我想要每个动态保存到一个以发布日期命名的文件夹下,方便回忆
4. 我想要每个动态的下方配文保存成一个txt文档,方便我查看和保存
二,技术选型: python
三, 分析与代码实现:
经分析发现:
1.存在一个接口我们可以查看up主的相薄总条数
例如其中一个人的uid为: 9824766
访问接口:
https://api.bilibili.com/x/dynamic/feed/draw/upload_count?uid=9824766&jsonp=jsonp
浏览器得到的响应为:
uid = input("请输入up主的uid,然后按回车键: ")
# 得到总的相薄中动态的总个数
uid_url = 'https://api.bilibili.com/x/dynamic/feed/draw/upload_count?uid=' + uid + '&jsonp=jsonp'
header = {'user-agent': 'Mozilla/5.0'}
count_request = requests.get(uid_url, headers=header)
count_res = json.loads(count_request._content)
count = count_res.get('data').get('all_count')
print("相薄里的动态个数为: "+count)
2.存在一个接口,可以根据传的页数参数返回一个带有description,img_src数组 和ctime 发布时间 json响应
接口格式如下:
https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid=’ + uid + ‘&page_num=’ + str(page_num) + ‘&page_size=30&biz=all&jsonp=jsonp’
例如访问某up的第一页相薄:
https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid=9824766&page_num=0&page_size=30&biz=all&jsonp=jsonp
解析代码:
def getRecords(url):
r = requests.get(url, headers=header)
status = r.status_code
if (200 == status):
contentDir = json.loads(r._content)
items = contentDir.get('data').get('items')
Records = []
for item in items:
oneRecord = {}
description = item.get('description')
pictures = item.get('pictures')
timeStamp = item.get('ctime')
timearr = time.localtime(timeStamp)
ctime = time.strftime("%Y_%m_%d_%H_%M_%S", timearr)
srcList = []
picture_size = len(pictures)
for picture in pictures:
src = picture.get('img_src')
srcList.append(src)
oneRecord.setdefault('desc', description)
oneRecord.setdefault('src', srcList)
oneRecord.setdefault('ctime', ctime)
Records.append(oneRecord)
return Records
3.访问每一页
通过1,2我们可以查看到一个up主相薄里动态的总个数,也知道一页是30个,
那么我们就能知道有多少页,拼接页数参数就能去访问每一页了
uid = input("请输入up主的uid,然后按回车键: ")
page_num = 0
while count > 0:
url = 'https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid=' + uid + '&page_num=' + str(page_num) + '&page_size=30&biz=all&jsonp=jsonp'
#得到一页中的的description和img_src 和ctime
records = getRecords(url)
print("第 "+str(page_num)+'页 开始在下载')
# 下载 1页中的相薄和description
download(records)
page_num += 1
count = count - 30
4.图片下载和description写为txt,文件夹名字为发布时间ctime
def download(records):
for record in records:
ctime = record.get('ctime')
dir = os.getcwd() + "\\image\\" + ctime + "\\"
if not os.path.exists(dir):
os.makedirs(dir)
urls = record.get('src')
description = record.get('desc')
for url in urls:
r = requests.get(url)
file_path = os.path.splitext(url)[0]
file_name = file_path.split('/')[-1]
with open(dir + file_name + '.jpg', 'wb') as f:
f.write(r.content)
# 写文本文件
with open(dir + '/description.txt', encoding='utf-8', mode='w') as f:
f.write(str(description))
四,全部代码整理
# coding:utf-8
import os
import requests
import json
import time
def download(records):
for record in records:
ctime = record.get('ctime')
dir = os.getcwd() + "\\image\\" + ctime + "\\"
if not os.path.exists(dir):
os.makedirs(dir)
urls = record.get('src')
description = record.get('desc')
for url in urls:
r = requests.get(url)
file_path = os.path.splitext(url)[0]
file_name = file_path.split('/')[-1]
with open(dir + file_name + '.jpg', 'wb') as f:
f.write(r.content)
# 写文本文件
with open(dir + '/description.txt', encoding='utf-8', mode='w') as f:
f.write(str(description))
def getRecords(url):
r = requests.get(url, headers=header)
status = r.status_code
if (200 == status):
contentDir = json.loads(r._content)
items = contentDir.get('data').get('items')
Records = []
for item in items:
oneRecord = {}
description = item.get('description')
pictures = item.get('pictures')
timeStamp = item.get('ctime')
timearr = time.localtime(timeStamp)
ctime = time.strftime("%Y_%m_%d_%H_%M_%S", timearr)
srcList = []
picture_size = len(pictures)
for picture in pictures:
src = picture.get('img_src')
srcList.append(src)
oneRecord.setdefault('desc', description)
oneRecord.setdefault('src', srcList)
oneRecord.setdefault('ctime', ctime)
Records.append(oneRecord)
return Records
# uid = '546195'
uid = input("请输入up主的uid,然后按回车键: ")
# 得到总的相薄中动态的总个数
uid_url = 'https://api.bilibili.com/x/dynamic/feed/draw/upload_count?uid=' + uid + '&jsonp=jsonp'
header = {'user-agent': 'Mozilla/5.0'}
count_request = requests.get(uid_url, headers=header)
count_res = json.loads(count_request._content)
count = count_res.get('data').get('all_count')
# 第几页
page_num = 0
while count > 0:
url = 'https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid=' + uid + '&page_num=' + str(page_num) + '&page_size=30&biz=all&jsonp=jsonp'
records = getRecords(url)
print("第 "+str(page_num)+' 正在下载')
download(records)
page_num += 1
count = count - 30