项目场景:
在一个DataFrame中,使用df.corr()计算每一列之间的协方差。
总共有8列,500行。
print(train_data_val.dtypes)
#每一列的dtype为object
问题描述
报了以下错误:
AttributeError Traceback (most recent call last)
/tmp/ipykernel_159/3488124376.py in <module>
23 print(train_data_val['market1501_rank'])
24
---> 25 print(train_data_val['cplfw_rank'].corr(train_data_val['market1501_rank']))
26 print(train_data_val.corr())
27
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/pandas/core/series.py in corr(self, other, method, min_periods)
2333 if method in ["pearson", "spearman", "kendall"] or callable(method):
2334 return nanops.nancorr(
-> 2335 this.values, other.values, method=method, min_periods=min_periods
2336 )
2337
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/pandas/core/nanops.py in _f(*args, **kwargs)
69 try:
70 with np.errstate(invalid="ignore"):
---> 71 return f(*args, **kwargs)
72 except ValueError as e:
73 # we want to transform an object array
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/pandas/core/nanops.py in nancorr(a, b, method, min_periods)
1350
1351 f = get_corr_func(method)
-> 1352 return f(a, b)
1353
1354
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/pandas/core/nanops.py in func(a, b)
1371
1372 def func(a, b):
-> 1373 return np.corrcoef(a, b)[0, 1]
1374
1375 return func
<__array_function__ internals> in corrcoef(*args, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/numpy/lib/function_base.py in corrcoef(x, y, rowvar, bias, ddof)
2549 warnings.warn('bias and ddof have no effect and are deprecated',
2550 DeprecationWarning, stacklevel=3)
-> 2551 c = cov(x, y, rowvar)
2552 try:
2553 d = diag(c)
<__array_function__ internals> in cov(*args, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/numpy/lib/function_base.py in cov(m, y, rowvar, bias, ddof, fweights, aweights)
2454 w *= aweights
2455
-> 2456 avg, w_sum = average(X, axis=1, weights=w, returned=True)
2457 w_sum = w_sum[0]
2458
<__array_function__ internals> in average(*args, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/numpy/lib/function_base.py in average(a, axis, weights, returned)
413
414 if returned:
--> 415 if scl.shape != avg.shape:
416 scl = np.broadcast_to(scl, avg.shape).copy()
417 return avg, scl
AttributeError: 'float' object has no attribute 'shape'
解决方案:
将DataFrame数据转换为float16.
train_data_val = train_data_val.astype('float16')
便可以解决该问题。