1.探索性数据分析(EDA-exploration data analysis)
import numpy as np
import pandas as pd
#读入数据
file_path = "test.csv"
demo_df = pd.read_csv(file_path)
#查看几条数据
demo_df.head()
#查看数据概览
demo_df.describe()
#查看数据相关性:method='pearson',默认这个,三个相关系数的计算方法:https://link.csdn.net/?target=https%3A%2F%2Fwww.biaodianfu.com%2Fpearson-kendall-spearman.html
demo_df.corr()
#与目标的相关性
demo_df.corr()['target_fea']
#查看目标值情况
demo_df['target'].unique()
#绘图
demo_df[['target']].plot()
#查看各个数据的分布
demo_df['targe_fe'].value_counts()
#绘图查看密度曲线
demo_df[['target']].plot(kind='density')