def feature_deal(data):
data.drop(columns=['itemCode'],inplace=True)
label_feature = ['defectName-1','defectName-2']
null_precentile = data.isnull().sum() / len(data)
dropcol_list1 = null_precentile[null_precentile.values > 0.97].index.tolist() #删除掉空值超过百分之97的特征
data.drop(columns=dropcol_list1,inplace=True)
text_columns = [col for col in data.columns if data[col].apply(lambda x: isinstance(x, str) and len(x) >= 50).any()] #筛出文本特征
categorical_columns = data.select_dtypes(include=['object']).columns.to_list()
dropcol_list2 = label_feature + text_columns
categorical_columns = [cat for cat in categorical_columns if cat not in dropcol_list2] #筛选类别特征
print(categorical_columns)
for col in categorical_columns: #类别特征编码
data[col] = data[col].str.strip()
data[col] = data[col].astype('str') #保证所有列值类型一致
return data
def feature_deal(data):
data.drop(columns=['itemCode'],inplace=True)
label_feature = ['defectName-1','defectName-2']
null_precentile = data.isnull().sum() / len(data)
dropcol_list1 = null_precentile[null_precentile.values > 0.97].index.tolist() #删除掉空值超过百分之97的特征
data.drop(columns=dropcol_list1,inplace=True)
text_columns = [col for col in data.columns if data[col].apply(lambda x: isinstance(x, str) and len(x) >= 50).any()] #筛出文本特征
categorical_columns = data.select_dtypes(include=['object']).columns.to_list()
dropcol_list2 = label_feature + text_columns
categorical_columns = [cat for cat in categorical_columns if cat not in dropcol_list2] #筛选类别特征
print(categorical_column
12-02
2478
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
04-29