import os
import glob
# UTF-8
BOM_UTF8 = b'\xef\xbb\xbf'
# UTF-16, little endian
BOM_LE = BOM_UTF16_LE = b'\xff\xfe'
# UTF-16, big endian
BOM_BE = BOM_UTF16_BE = b'\xfe\xff'
# UTF-32, little endian
BOM_UTF32_LE = b'\xff\xfe\x00\x00'
# UTF-32, big endian
BOM_UTF32_BE = b'\x00\x00\xfe\xff'
# 读取文本文件,
# 1. 自动去除utf8 BOM 头
# 2. 统一 \r\n 转成 \n
def ReadAllTextFromFile(filepath,encoding='utf-8'):
try:
with open(filepath,'r+',encoding=encoding) as f:
s = f.read().replace('\r\n','\n')
# print('头部有1')
# # bb = bytes(s,encoding='utf-8')
# print(ord(s[0]))
# print('%#x'%ord(s[1]))
# print('%#x'%ord(s[2]))
# print('%#x'%ord(s[3]))
# print('%#x'%ord(s[4]))
if ord(s[0]) == 65279:# 65279 对应的是 \xfeff 是BOM头.所以去掉
return s[1:]
if ord(s[0]) == 65534:# 65534 对应的是 \xfffe 是BOM头.所以去掉
return s[1:]
if ord(s[0]) == 4294836224:# 4294836224 对应的是 \xfffe0000 是BOM头.所以去掉
return s[1:]
return s
except Exception as ex:
print(ex)
return ""
# 取得所有的行数, splitlines 不带换行符
def ReadAllLinesFromFile(filepath,encoding='utf-8'):
txt = ReadAllTextFromFile(filepath,encoding)
return txt.splitlines()
def WriteAllTextToFile(filepath,content,encoding='utf-8'):
with open(filepath,'w+',encoding=encoding) as f:
return f.write(content)
def AppendTextToFile(filepath,content,encoding='utf-8'):
with open(filepath,'a+',encoding=encoding) as f:
return f.write(content)
# 读取二进制文件
def ReadBinaryFile(filepath):
with open(filepath,'rb') as fp:
image = fp.read()
return image
# 删除文件夹及其子文件夹
def DeleteDir(dir):
if os.path.exists(dir):
for f in os.listdir(dir):
ff = os.path.join(dir , f)
if os.path.isfile(ff ):
os.remove(ff)
elif os.path.isdir(ff):
DeleteDir(ff)
if os.path.exists(dir):
os.removedirs(dir)
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
def GetImagesByPath(sourceImgDir):
files = sorted(glob.glob( os.path.join(sourceImgDir,'*.*')))
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
return images
def 标注名替换(src,dst):
datasetRootDir = "E:\\www\\ai\\static\\dataset\\非厨余垃圾"
# srclabelname = '"labelName":"%s"' % ( src ) # 把名字叫easycan的改成 易拉罐
# dstlabelname = '"labelName":"%s"' % ( dst ) # 把名字叫easycan的改成 易拉罐
mylabel_jsonfiles = glob.glob(os.path.join(datasetRootDir, '*','*.tag.txt'))# 第一个星号 '*' 是子文件夹,第二个* 是文件名
if len(mylabel_jsonfiles) == 0 :
mylabel_jsonfiles = glob.glob(os.path.join(datasetRootDir, '*','*.json'))# 第一个星号 '*' 是子文件夹,第二个* 是文件名
for labelfile in mylabel_jsonfiles:
txt = ReadAllTextFromFile(labelfile)
txt = txt.replace('"labelName":"%s"' % ( src ) , '"labelName":"%s"' % ( dst ))
txt = txt.replace('"labelname":"%s"' % ( src ), '"labelname":"%s"' % ( dst ))
WriteAllTextToFile(labelfile,txt)
print("替换完毕",src,"=>",dst)
# 标注名替换("easycan","易拉罐")
# 标注名替换("plasticbag","塑料袋")
# 标注名替换("plasticbox","塑料箱")
# 标注名替换("textilebag","纺织袋")
# 标注名替换("foam","泡沫")
# 标注名替换("fruitfoam","果沫")
# 标注名替换("paperbox","纸盒")
# 标注名替换("milkbox","牛奶箱")
# 标注名替换("papernapkin","餐巾纸")
# 标注名替换("glassbottle","玻璃瓶")
# 标注名替换("bottle","瓶子")
# 标注名替换("啤酒瓶","瓶子")
# 标注名替换("container","容器")
# 标注名替换("cup","杯子")
# 标注名替换("塑料瓶","瓶子")
# 标注名替换("尼龙袋","纺织袋")
# 标注名替换("塑料箱","塑料盒")
# 标注名替换("牛奶箱","牛奶盒")
标注名替换("易拉罐","罐子")
使用的时候要修改 标注名替换(src,dst) 这个函数.