Python过滤重复的文字(字符)
第一步:新建FilterRepeatCharacter.py文件
- 过滤方法一:如果顺序不重要,你可以使用 set(context)
- 过滤方法二:如果顺序很重要,你可以使用 collections.OrderedDict.fromkeys(context):
代码示例:
# 去掉字符串中重复字符的方法
import sys
import os
from collections import OrderedDict
# context = "重复内容重复内容"
# 方法一:如果顺序不重要,你可以使用 set(context)
# print("".join(set(context)))
# 方法二:如果顺序很重要,你可以使用 collections.OrderedDict.fromkeys(context):
# print("".join(OrderedDict.fromkeys(context)))
# 传入相对路径
targetdir = ""
targetName = ""
if (len(sys.argv) > 1):
targetdir = sys.argv[1]
targetName = sys.argv[2]
else:
targetdir = "..\\FontZip\\Font"
targetName = "FilterFontZip.txt"
# print(targetdir, targetName)
def getfilepaths(suffixNameList):
paths = []
l = os.walk(targetdir)
for root, dirs, files in l: # os.path.curdir
for file in files:
expandName = os.path.splitext(file)[1]
if expandName in suffixNameList: # 匹配后缀名称
paths.append(os.path.join(root, file))
break
return paths
def readfile(path, contentList):
print(path)
try:
f = open(path, mode="r", encoding="utf-8")
except UnicodeDecodeError as e:
f = open(path, mode="r")
lines = f.readlines()
# contentList.extend("".join(OrderedDict.fromkeys(lines)))
contentList.extend("".join(set(lines)))
f.close()
return contentList
def readallfiles(filepaths):
contentList = []
for path in filepaths:
readfile(path, contentList)
return contentList
def writefile(filepath, textlist):
texts = "".join(textlist).replace("\n", "").replace("\t", "").replace("\r", "")
f = open(filepath, mode="w+", encoding="utf-8")
f.seek(0)
f.truncate() # 清空文件内容
# f.write("".join(OrderedDict.fromkeys(texts)))
f.write("".join(set(texts)))
f.close()
def filterFont():
filePaths = getfilepaths(['.json', '.txt'])
mergeList = readallfiles(filePaths)
writePath = os.path.join(targetdir, targetName)
writefile(writePath, mergeList)
if __name__ == '__main__':
filterFont()
第二步:新建FilterRepeatCharacter.bat文件
代码示例
rem 第一个参数:文件夹的路径(支持相对路径),第二个参数:生成的文件夹名称(带后缀名称)
python FilterRepeatCharacter.py ..\\FontZip\\Font FilterFont.txt
pause