python批量替换文本文件内容

import os
import glob 

 


# UTF-8
BOM_UTF8 = b'\xef\xbb\xbf'

# UTF-16, little endian
BOM_LE = BOM_UTF16_LE = b'\xff\xfe'

# UTF-16, big endian
BOM_BE = BOM_UTF16_BE = b'\xfe\xff'

# UTF-32, little endian
BOM_UTF32_LE = b'\xff\xfe\x00\x00'

# UTF-32, big endian
BOM_UTF32_BE = b'\x00\x00\xfe\xff'

# 读取文本文件, 
# 1. 自动去除utf8 BOM 头
# 2. 统一 \r\n 转成 \n
def ReadAllTextFromFile(filepath,encoding='utf-8'):
    try: 
        with open(filepath,'r+',encoding=encoding) as f:
            s = f.read().replace('\r\n','\n')
            # print('头部有1')
            # # bb = bytes(s,encoding='utf-8')
            # print(ord(s[0]))
            # print('%#x'%ord(s[1]))
            # print('%#x'%ord(s[2]))
            # print('%#x'%ord(s[3]))
            # print('%#x'%ord(s[4]))
            if ord(s[0]) == 65279:# 65279 对应的是 \xfeff 是BOM头.所以去掉
                return s[1:]
            if ord(s[0]) == 65534:# 65534 对应的是 \xfffe 是BOM头.所以去掉
                return s[1:]
            if ord(s[0]) == 4294836224:# 4294836224 对应的是 \xfffe0000 是BOM头.所以去掉
                return s[1:]
            return s
    except Exception as ex:
        print(ex)
        return ""

# 取得所有的行数, splitlines 不带换行符 
def ReadAllLinesFromFile(filepath,encoding='utf-8'):
    txt = ReadAllTextFromFile(filepath,encoding)
    return txt.splitlines()

def WriteAllTextToFile(filepath,content,encoding='utf-8'):
    with open(filepath,'w+',encoding=encoding) as f:
        return f.write(content)

def AppendTextToFile(filepath,content,encoding='utf-8'):
    with open(filepath,'a+',encoding=encoding) as f:
        return f.write(content)

# 读取二进制文件
def ReadBinaryFile(filepath):
    with open(filepath,'rb') as fp:
        image = fp.read()
        return image

# 删除文件夹及其子文件夹
def DeleteDir(dir):
    if os.path.exists(dir):
        for f in os.listdir(dir):
            ff =  os.path.join(dir , f)
            if os.path.isfile(ff ):
                os.remove(ff)
            elif os.path.isdir(ff):
                DeleteDir(ff)
    
    if os.path.exists(dir):
        os.removedirs(dir)



img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
def GetImagesByPath(sourceImgDir):
     files = sorted(glob.glob( os.path.join(sourceImgDir,'*.*')))
     images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
     return images




def 标注名替换(src,dst):
    datasetRootDir = "E:\\www\\ai\\static\\dataset\\非厨余垃圾"
    # srclabelname = '"labelName":"%s"' % ( src )   # 把名字叫easycan的改成 易拉罐
    # dstlabelname = '"labelName":"%s"' % ( dst )  # 把名字叫easycan的改成 易拉罐

    mylabel_jsonfiles = glob.glob(os.path.join(datasetRootDir, '*','*.tag.txt'))# 第一个星号 '*'  是子文件夹,第二个* 是文件名
    if len(mylabel_jsonfiles) == 0 :
        mylabel_jsonfiles = glob.glob(os.path.join(datasetRootDir, '*','*.json'))# 第一个星号 '*'  是子文件夹,第二个* 是文件名

    for labelfile  in mylabel_jsonfiles:
        txt =  ReadAllTextFromFile(labelfile)
        txt  = txt.replace('"labelName":"%s"' % ( src ) , '"labelName":"%s"' % ( dst ))
        txt  = txt.replace('"labelname":"%s"' % ( src ),  '"labelname":"%s"' % ( dst ))

        WriteAllTextToFile(labelfile,txt)
    print("替换完毕",src,"=>",dst)

# 标注名替换("easycan","易拉罐")
# 标注名替换("plasticbag","塑料袋")
# 标注名替换("plasticbox","塑料箱")
# 标注名替换("textilebag","纺织袋")
# 标注名替换("foam","泡沫")
# 标注名替换("fruitfoam","果沫")
# 标注名替换("paperbox","纸盒")
# 标注名替换("milkbox","牛奶箱")
# 标注名替换("papernapkin","餐巾纸")
# 标注名替换("glassbottle","玻璃瓶")
# 标注名替换("bottle","瓶子")
# 标注名替换("啤酒瓶","瓶子")
# 标注名替换("container","容器")
# 标注名替换("cup","杯子")
# 标注名替换("塑料瓶","瓶子")
# 标注名替换("尼龙袋","纺织袋")
# 标注名替换("塑料箱","塑料盒")
# 标注名替换("牛奶箱","牛奶盒")
标注名替换("易拉罐","罐子")

使用的时候要修改 标注名替换(src,dst) 这个函数.

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值