背景
笔者从20年开始使用《语雀》该平台做相关的知识库梳理,但是最近的语雀更新,感觉又会走有道云的老路了,所以赶紧将相关的笔记备份出来。
使用语雀客户端一篇一篇导出太费劲了,正巧看到官方有开放API访问接口 Doc - 文档 (yuque.com)
稍微写了个脚本,实现相关内容
说明
依赖
requests:API请求
pdfkit:转PDF保存
需要修改的内容
-
DATA_PATH:文件保存路径
-
DATA_TYPE:支持 0:MarkDown格式 1:html 2:pdf
-
USER_TOKEN:用户的Token
访问:https://www.yuque.com/settings/tokens,新建一个Token,允许访问所有的资源吧
-
WKHTMLTOPDF_PATH:wkhtmltopdf插件路径
插件下载地址:https://wkhtmltopdf.org/downloads.html
源码奉上
'''
Description: export YuQue all docs
Author: 是小之禺啊
Date: 2022-11-20
'''
import os
import re
import requests
import pdfkit
# folder path
DATA_PATH="yuque"
# 0: MarkDown 1: html 2: html to pdf
DATA_TYPE=0
# yuque's api
YUQUE_API="https://customspace.yuque.com/api/v2"
# any str, because api must need
USER_AGENT="agent"
# your yuque token , generate at https://www.yuque.com/settings/tokens
USER_TOKEN="***********************************"
# html to pdo tool path, download at https://wkhtmltopdf.org/downloads.html
WKHTMLTOPDF_PATH=r"D:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe"
class CExportYuQueDocs:
def __init__(self):
try:
self.api = YUQUE_API
self.headers = {
"User-Agent": USER_AGENT,
"X-Auth-Token": USER_TOKEN
}
self.data_path = DATA_PATH
self.doc_type = DATA_TYPE
self.pdfkit_config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH)
self.pdfkit_options = {
'page-height': '297',
'page-width': '210',
'encoding': 'UTF-8',
'custom-header': [('Accept-Encoding', 'gzip')]
}
except:
raise ValueError("Parameter Error!")
def login(self):
"""login"""
request = requests.get(url=self.api + '/user', headers=self.headers)
if 200 != request.status_code:
raise ValueError("Token Error!")
userJson = request.json()
self.login_id = userJson['data']['login']
self.uid = userJson['data']['id']
self.username = userJson['data']['name']
print(f"{self.username} Login Success!")
def getRepos(self):
"""get the user's repos"""
reposRequest = requests.get(self.api + '/users/' + self.login_id + '/repos', headers=self.headers).json()
reposList = []
for item in reposRequest['data']:
# rid=warehouse's is,
reposList.append({"id": item['id'], "name": item['name']})
return reposList
'''
' get all docs details in repos
'''
def getDocs(self, reposList):
listDocs = []
# get all docs info in repos
for repos in reposList:
docsRequest = requests.get(self.api + '/repos/' + str(repos['id']) + '/docs',
headers=self.headers).json()
for item in docsRequest['data']:
listDocs.append(
{
"id": repos['id'],
"title": item['title'],
"description": item['description'],
"slug": item['slug'],
"name": repos["name"]
}
)
# get all doc details
for item in listDocs:
# get doc details 'GET /repos/:namespace/docs/:slug'
docDetails = requests.get(self.api + '/repos/' + str(item['id']) + '/docs/' + item['slug'],
headers=self.headers).json()
if 0 == self.doc_type:
# convert '\\n' to '\n'
docDetails_1 = re.sub(r'\\n', "\n", docDetails['data']['body'])
# delete '<a name="(.*)"></a>'
docsData = re.sub(r'<a name="(.*)"></a>', "", docDetails_1)
else:
docsData = re.sub(r'<!doctype html>', r'<!doctype html><head><link rel="stylesheet" href="http://editor.yuque.com/ne-editor/lake-content-v1.css"></head>',
docDetails['data']['body_html'])
# return data
yield docsData, item["name"], item['title']
'''
' save all docs data to disk
'''
def saveDocs(self, data, name, title):
"""save docs"""
if 0 == self.doc_type:
saveFolder = f"{self.data_path}/md/{name}"
filePath = saveFolder + f"/{title}.md"
elif 1 == self.doc_type:
saveFolder = f"{self.data_path}/html/{name}"
filePath = saveFolder + f"/{title}.html"
elif 2 == self.doc_type:
saveFolder = f"{self.data_path}/pdf/{name}"
filePath = saveFolder + f"/{title}.pdf"
else:
raise ValueError(f"Error Type to Save! Type[{self.doc_type}]")
# if repos fplder not exist
if not os.path.exists(saveFolder):
os.makedirs(saveFolder)
# if file exist
if os.path.exists(filePath):
try:
os.remove(filePath)
except Exception as e:
raise ValueError(f"File [{filePath}] is occupied!")
# try save data
try:
if 2 == self.doc_type:
# convert html data to pdf
pdfkit.from_string(data, filePath, configuration=self.pdfkit_config, options=self.pdfkit_options)
else:
with open(filePath, 'a', encoding="utf-8") as fp:
fp.writelines(data)
print(f"Save [{filePath}] Success!")
except Exception as e:
print(f"Save [{filePath}] Failed!")
'''
' start fork docs
'''
def start(self):
# login
self.login()
# get repos
reposList = self.getRepos()
# get all doc in every repos
for item in self.getDocs(reposList):
self.saveDocs(item[0], item[1], item[2])
if __name__ == "__main__":
yq = CExportYuQueDocs()
yq.start()