import numpy as np
e14601 = np.loadtxt(open("/home/zlf/Documents/e14601_data_set.csv", "rb"), delimiter=",", skiprows=0)
a = e14601.shape;
print(a[0], a[1]);
not0_num = [0 for x in range(a[1]-1)];
not0_sum = [0 for x in range(a[1]-1)];
not0_mean = [0 for x in range(a[1]-1)];
nans = [0 for x in range(a[1]-1)];
for j in range(a[1]-1):
for i in range(a[0]):
if e14601[i][j]!=0:
not0_num[j] = not0_num[j] + 1;
not0_sum[j] = not0_sum[j] + e14601[i][j];
for j in range(a[1]-1):
if j<len(not0_num):
if not0_num[j]==0:
nans[j] = 1;
e14601 = np.delete(e14601, j, axis=1);
not0_num = np.delete(not0_num, j, axis=0);
not0_sum = np.delete(not0_sum, j, axis=0);
a = e14601.shape;
print(a[0], a[1]);
not0_mean = np.divide(not0_sum, not0_num);
for j in range(a[1]-1):
for i in range(a[0]):
if e14601[i][j]==0:
e14601[i][j]=not0_mean[j]
np.savetxt('/home/zlf/Documents/e14601_data_set_n0.csv', e14601, delimiter=',')
特征缺失值用平均值代替
最新推荐文章于 2023-05-26 05:41:02 发布