批量将语雀导出的.md文件中的图片转到本地

Gapaus

已于 2023-02-10 10:10:39 修改

阅读量904

点赞数

文章标签：语雀 python

于 2023-02-09 10:06:11 首次发布

本文链接：https://blog.csdn.net/zxhy_/article/details/128947867

版权

该Python脚本用于从Markdown文件中提取并下载语雀平台的图片，同时将图片链接替换为本地路径。支持两种图片重命名模式，并可自定义图片存储目录和链接前缀。脚本读取.md文件，找到http(s)://开头且包含.png或.jpeg的URL，下载图片并替换原始链接。

摘要由CSDN通过智能技术生成

参考：https://blog.csdn.net/lemon_TT/article/details/128380655
加入了批量处理.md文件功能
注：不要使用asc编号，多次导入时有bug

import re
import requests
import os
import sys

yuque_cdn_domain = 'cdn.nlark.com'
image_file_prefix = 'image-'


def deal_yuque(origin_md_path, output_md_path, image_dir, image_url_prefix, image_rename_mode):
    idx = 0
    output_content = []  # 输出内容
    with open(origin_md_path, 'r', encoding='utf-8', errors='ignore') as f:
        for line in f.readlines():
            line = re.sub(r'png#(.*)+', 'png)', line)
            image_url = str(
                re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line))
            # 如果只下载语雀的图片可以在这里加个判断
            # if yuque_cdn_domain in image_url:
            if ('https://' in image_url) and ('.png' in image_url):
                image_url = image_url.replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace("'",
                                                                                                                  '')
                if '.png' in image_url:
                    suffix = '.png'
                elif '.jpeg' in image_url:
                    suffix = '.jpeg'
                download_image(image_url, image_dir, image_rename_mode, idx, suffix)
                to_replace = '/'.join(image_url.split('/')[:-1])
                new_image_url = image_url.replace(to_replace, 'placeholder')
                if image_rename_mode == 'asc':
                    new_image_url = image_url_prefix + image_file_prefix + str(idx) + suffix
                else:
                    new_image_url = new_image_url.replace('placeholder/', image_url_prefix)
                idx += 1
                line = line.replace(image_url, new_image_url)
            output_content.append(line)
    with open(output_md_path, 'w', encoding='utf-8', errors='ignore') as f:
        for _output_content in output_content:
            f.write(str(_output_content))
    return idx


def download_image(image_url, image_dir, image_name_mode, idx, suffix):
    r = requests.get(image_url, stream=True)
    image_name = image_url.split('/')[-1]
    if image_name_mode == 'asc':
        image_name = image_file_prefix + str(idx) + suffix
    if r.status_code == 200:
        open(image_dir + '/' + image_name, 'wb').write(r.content)
    del r


def mkdir(image_dir):
    image_dir = image_dir.strip()
    image_dir = image_dir.rstrip("\\")
    isExists = os.path.exists(image_dir)
    if isExists:
        print('图片存储目录已存在')
    else:
        os.makedirs(image_dir)
        print('图片存储目录创建成功')
    return image_dir


# 批量文件处理
def main():
    '''
    origin_md_path: 输入的markdown文件路径
    output_md_path: 输出的markdown文件路径
    image_dir: 图片存储的目录
    image_url_prefix: 图片链接前缀，空字符串或者路径或者CDN地址
    image_rename_mode: 图片重命名模式，raw: 原始uuid模式，asc: 递增重命名模式
    文件夹结构:
    notebook
    |--mynote1.md
    |--mynote2.md
    '''
    origin_md_path = 'C:/Users/xxx/Desktop/notebook/'
    output_md_path = 'C:/Users/xxx/Desktop/notebook/'
    image_dir = output_md_path + 'img/'
    image_url_prefix = '/img/'  # .md文件里的索引，用相对路径
    image_rename_mode = 'asc'  # raw asc
    mkdir(image_dir)  # 在存储md笔记的文件夹下新建img文件夹
    fileList = os.listdir(origin_md_path)
    for f in fileList:
        if len(f) >= 3 and f[-3:] == '.md':  # 判断.md文件
            cnt = deal_yuque(origin_md_path + f, output_md_path + f, image_dir, image_url_prefix, image_rename_mode)
            print('处理完成, 共{}张图片'.format(cnt))


if __name__ == '__main__':
    main()