参考:https://blog.csdn.net/lemon_TT/article/details/128380655
加入了批量处理.md文件功能
注:不要使用asc编号,多次导入时有bug
import re
import requests
import os
import sys
yuque_cdn_domain = 'cdn.nlark.com'
image_file_prefix = 'image-'
def deal_yuque(origin_md_path, output_md_path, image_dir, image_url_prefix, image_rename_mode):
idx = 0
output_content = [] # 输出内容
with open(origin_md_path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f.readlines():
line = re.sub(r'png#(.*)+', 'png)', line)
image_url = str(
re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line))
# 如果只下载语雀的图片可以在这里加个判断
# if yuque_cdn_domain in image_url:
if ('https://' in image_url) and ('.png' in image_url):
image_url = image_url.replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace("'",
'')
if '.png' in image_url:
suffix = '.png'
elif '.jpeg' in image_url:
suffix = '.jpeg'
download_image(image_url, image_dir, image_rename_mode, idx, suffix)
to_replace = '/'.join(image_url.split('/')[:-1])
new_image_url = image_url.replace(to_replace, 'placeholder')
if image_rename_mode == 'asc':
new_image_url = image_url_prefix + image_file_prefix + str(idx) + suffix
else:
new_image_url = new_image_url.replace('placeholder/', image_url_prefix)
idx += 1
line = line.replace(image_url, new_image_url)
output_content.append(line)
with open(output_md_path, 'w', encoding='utf-8', errors='ignore') as f:
for _output_content in output_content:
f.write(str(_output_content))
return idx
def download_image(image_url, image_dir, image_name_mode, idx, suffix):
r = requests.get(image_url, stream=True)
image_name = image_url.split('/')[-1]
if image_name_mode == 'asc':
image_name = image_file_prefix + str(idx) + suffix
if r.status_code == 200:
open(image_dir + '/' + image_name, 'wb').write(r.content)
del r
def mkdir(image_dir):
image_dir = image_dir.strip()
image_dir = image_dir.rstrip("\\")
isExists = os.path.exists(image_dir)
if isExists:
print('图片存储目录已存在')
else:
os.makedirs(image_dir)
print('图片存储目录创建成功')
return image_dir
# 批量文件处理
def main():
'''
origin_md_path: 输入的markdown文件路径
output_md_path: 输出的markdown文件路径
image_dir: 图片存储的目录
image_url_prefix: 图片链接前缀,空字符串或者路径或者CDN地址
image_rename_mode: 图片重命名模式,raw: 原始uuid模式,asc: 递增重命名模式
文件夹结构:
notebook
|--mynote1.md
|--mynote2.md
'''
origin_md_path = 'C:/Users/xxx/Desktop/notebook/'
output_md_path = 'C:/Users/xxx/Desktop/notebook/'
image_dir = output_md_path + 'img/'
image_url_prefix = '/img/' # .md文件里的索引,用相对路径
image_rename_mode = 'asc' # raw asc
mkdir(image_dir) # 在存储md笔记的文件夹下新建img文件夹
fileList = os.listdir(origin_md_path)
for f in fileList:
if len(f) >= 3 and f[-3:] == '.md': # 判断.md文件
cnt = deal_yuque(origin_md_path + f, output_md_path + f, image_dir, image_url_prefix, image_rename_mode)
print('处理完成, 共{}张图片'.format(cnt))
if __name__ == '__main__':
main()