markdown中图片转base64_markdown base64-CSDN博客

本文链接：https://blog.csdn.net/gogoingstudy/article/details/119064231

python 转换图片为base64

图片转为base64 原理很简单基本上那种语言都可以做到

写本文的原因是:csdn中导入markdown文件时,图片导入失败或者不能导入的问题

解决办法是:把markdown中的所有图片转为base64即可¹.

// 转为base64 之后这样写即可展示出图片
![image-name][image-id]

[image-id]:图片base64编码

把markdown文件中的图片转为base64位:

1.读取markdown文件,并用正则查找出所有的图片标签
2.替换图片标签![image-name](url) 为 ![image-name][image-id]
3.在md文件后面追加上base64编码 [image-id]:图片base64编码

完整代码如下:

import os
import base64
import re

images_suffix = ["gif", "png", "jpg", "jpeg"]


# 转换文件夹下的所有图片为base64
def image_to_base64(filepath, gen_name):
    filelist = os.listdir(filepath)
    with open('image64/' + gen_name, 'wt', encoding='utf-8') as gen:

        for filename in filelist:
            suffix = filename.rsplit(".")[1]
            if suffix in images_suffix:
                with open(filepath + '\\' + filename, 'rb') as f:
                    image_bytes = base64.b64encode(f.read())

                image_str = str(image_bytes)

                # 转成base64后的字符串格式为 b'图片base64字符串'，前面多了 b'，末尾多了 '，所以需要截取一下

                base64_pre = 'data:image/' + suffix + ';base64,'
                real_image_str = base64_pre + image_str[2:len(image_str) - 1]
                # print(real_image_str)
                gen.write(real_image_str)


# 转换一张图片为base64
def one_image_to_base64(filepath, img_type):
    with open(filepath, 'rb') as f:
        image_bytes = base64.b64encode(f.read())

    image_str = str(image_bytes)

    # 转成base64后的字符串格式为 b'图片base64字符串'，前面多了 b'，末尾多了 '，所以需要截取一下

    base64_pre = 'data:image/' + img_type[1:] + ';base64,'
    real_image_str = base64_pre + image_str[2:len(image_str) - 1]
    # print(real_image_str)
    return real_image_str

# 正则读取md文件中的图片url,并转换为[base64-image-id]
def transform(md_file_path, name):
    arr = []
    with open(md_file_path + '\\' + name, 'r', encoding='utf-8') as f:
        read = f.read()
        print(read)
        pattern = re.compile(r'!\[.*\](\(.*\.assets[/\\](.+?)(\.jpg|\.png|\.gif|\.jpeg)\))', re.M)
        match = pattern.finditer(read)
        for mm in match:
            source = mm.group(0)
            rep_after = source.replace(mm.group(1), '[' + mm.group(2) + ']')
            item = {'source': mm.group(0), 'img_path': mm.group(1), 'img_name': mm.group(2), 'img_type': mm.group(3)}
            print(mm.group(0))
            # print(mm.group(1))
            # print(mm.group(2))
            # print(mm.group(3))
            # print(source)
            # print(rep_after)
            arr.append(item)
            read = read.replace(source, rep_after)
        # print('==========================================')
        # print(read)
    with open(md_file_path + '\\copy_' + name, 'w', encoding='utf-8') as ff:
        ff.write(read)
    return arr


def md_transform_img(file_path_pre, md_file_name):
    arr = transform(file_path_pre, md_file_name + '.md')
    with open(file_path_pre + 'copy_' + md_file_name + '.md', 'a', encoding='utf-8') as w:
        for item in arr:
            img_base64_str = one_image_to_base64(
                file_path_pre + item['img_path'].replace('/', "\\").replace("(", "").replace(")", ""), item['img_type'])
            w.write('\n')
            w.write('[' + item['img_name'] + ']: ' + img_base64_str)
    print('完成!!!')

if __name__ == '__main__':
    # md文件的所在目录
    file_path_pre = r'C:\Users\Administrator\Desktop\工作文件夹\公众号文档\\'
    # md文件的名称
    # md文件对应的图片在 `md文件名.assets`下
    md_file_name = '1.python自动化登录12306'
    md_transform_img(file_path_pre, md_file_name)