1、原先的label标签,可以看出有的标签名不止占一行,这样的标签在后期做训练时会出错,而且还有一些列标签对模型判断没有作用,也给删除掉。
标签文件名
代码
import pandas as pd
import os
lineData = []
dirFile = 'C:/Users/S/Desktop/B4VM/data/Label/RawCvdLabel'
for root,dirs,files in os.walk(dirFile):
for file in files:
if len(file.split("_")) == 7:
glsId = file.split('_')[4]
else:
glsId = file.split('_')[3]
path = os.path.join(root,file)
cvdThick = pd.read_csv(path,encoding = 'utf8')
#print(cvdThick)
if cvdThick.shape[1] == 15:
cvdThick = cvdThick.drop(['Unnamed: 7','Unnamed: 8','Unnamed: 10','Unnamed: 11','Unnamed: 12','Unnamed: 13','Unnamed: 14'],axis=1)
#print(cvdThick)
cvdThick.columns = ['Probe','Area','X','Sub X','Y','Z','Lay1 Thickness','Lay2 Thickness']
cvdThick = cvdThick.drop([0,1])
#print(cvdThick)
cvdThick = cvdThick.reset_index().drop(['index'],axis = 1)
#print(cvdThick)
name = glsId +'_cvdThick' + '.csv'
cvdThick.to_csv('C:/Users/S/Desktop/B4VM/data/Label/2CleanCvdLabel/'+name,index=False,encoding='utf_8_sig')
#输出