需求:
得到:想将原本的分为43类的数据集,转化为分为4类的数据集
先生成
再生成
拥有原本分为43类的txt,43类和4类的对应关系,
代码部分
先将class_mapping.txt处理为一个字典
def read_c_m(path):
class_mapping_dict = {}
txt = open(path)
done = False
while not done:
line = txt.readline()
if line != "":
key = line[:line.index(' =')]
value = line[line.rindex('(') + 1:-1].replace(')','') + "\n"
# str.rindex表示从右向左,这里表示从右向左第一个(
# str.replace表示将字符串中出现的的参数1换为参数2
# -1写上代表最后一位是开区间,读到倒数第二位, 每行后面换行占一位
class_mapping_dict[key] = value
else:
done = True
txt.close() # open的要close
return class_mapping_dict # 返回一个43类(0-43)与4类(prohibitory等)对应的一个字典
cm = read_c_m("class_mapping.txt")
将gt.txt分号分割的最后一部分作为键,将上面字典的值取出来,将分号分割前的部分与字典对应值进行拼接,生成第一个结果
def modify(path,cm):
txt = open(path)
f=open("merge.txt","a")
done = False
while not done:
line = txt.readline()
if line != "":
key = line[line.rindex(';') + 1:].replace("\n", "")
value = line[:line.rindex(';') + 1]
f.write(value+cm[key])
else:
done = True
txt.close()
f.close()
modify("gt.txt",cm)
将最后的文字类(prohibitory)对应到数字类(1)
def modify_figure(path):
txt = open(path)
f = open("merge_figure.txt","a")
done = False
count1 = 0
count2 = 0
count3 = 0
count0 = 0
while not done:
line = txt.readline()
if line != "":
key = line[line.rindex(';')+1:].replace("\n","")
value = line[:line.rindex(';') + 1]
type = ""
if key =="prohibitory":
count0 += 1
type = "0"
elif key =="danger":
count1 += 1
type = "1"
elif key =="mandatory":
count2 += 1
type = "2"
elif key =="other":
count3 += 1
type = "3"
f.write(value + type + "\n") # 巧用
else:
done = True
print("count0={},count1={},count2={},count3={}".format(count0,count1,count2,count3)) # 字符串格式化输出
# count0=557,count1=219,count2=163,count3=274
f.close()
txt.close()
最终生成结果2