1. 方法一
import numpy as np
A = np.array([[ 7, 4, 5, 7000],
[ 1, 900, 9, nan],
[ 5, -1000, nan, 100],
[nan, nan, 3, 1000]])
#Compute NaN-norms
L1_norm = np.nansum(np.abs(A), axis=1)
L2_norm = np.sqrt(np.nansum(A**2, axis=1))
max_norm = np.nanmax(np.abs(A), axis=1)
#Normalize rows
A_L1 = A / L1_norm[:,np.newaxis] # A.values if Dataframe
A_L2 = A / L2_norm[:,np.newaxis]
A_max = A / max_norm[:,np.newaxis]
#Check that it worked
L1_norm_after = np.nansum(np.abs(A_L1), axis=1)
L2_norm_after = np.sqrt(np.nansum(A_L2**2, axis=1))
max_norm_after = np.nanmax(np.abs(A_max), axis=1)
In[182]: L1_norm_after
Out[182]: array([1., 1., 1., 1.])
In[183]: L2_norm_after
Out[183]: array([1., 1., 1., 1.])
In[184]: max_norm_after
Out[184]: array([1., 1., 1., 1.])
方法二
rom numpy import nan, nanmean
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
A = [[ 7, 4, 5, 7000],
[ 1, 900, 9, nan],
[ 5, -1000, nan, 100],
[nan, nan, 3, 1000]]
scaler.fit(A)
In [45]: scaler.mean_
Out[45]: array([4.33333333, -32., 5.66666667, 2700.])
In [46]: scaler.transform(A)
Out[46]: array([[ 1.06904497, 0.04638641, -0.26726124, 1.40399977],
[-1.33630621, 1.20089267, 1.33630621, nan],
[ 0.26726124, -1.24727908, nan, -0.84893009],
[ nan, nan, -1.06904497, -0.55506968]])
In [54]: nanmean(scaler.transform(A), axis=0)
Out[54]: array([ 1.48029737e-16, 0.00000000e+00, -1.48029737e-16,0.00000000e+00])
参考: