数据预处理-Sklearn实现归一化与标准化并plot
数据集是:https://archive.ics.uci.edu/ml/datasets/wine (这里只选前三列)
完整代码
import pandas as pd
import numpy as np
df = pd.read_csv( 'wine_data.csv', header=None, usecols=[0,1,2] )
# 不让第一行做列名 ; 只拿前三列数据
df.columns = ['Class label', 'Alcohol', 'Malic acid']
# 指定列名
from sklearn import preprocessing
std_scale = preprocessing.StandardScaler().fit(df[['Alcohol', 'Malic acid']])
df_std = std_scale.transform(df[['Alcohol', 'Malic acid']])
# 标准化
minmax_scale = preprocessing.MinMaxScaler().fit(df[['Alcohol', 'Malic acid']])
df_minmax = minmax_scale.transform(df[['Alcohol', 'Malic acid']])
# 归一化
# %matplotlilb inline
import matplotlib.pyplot as plt
def plot():
plt.figure(figsize=(8,6))
# 三个数据画图:
plt.scatter(df['Alcohol'], df['Malic acid'], color='green', label='input scale', alpha=0.6)
plt.scatter( df_std[:,0], df_std[:,1], color='red', label='Z-Score scaled[ $N ( \mu=0, \sigma=1 ) $ ]', alpha=0.3 )
# 数学符号的格式为$\...$
plt.scatter( df_minmax[:,0], df_minmax[:,1],color='b', label='Min-Max scaled [min=0, max=1]', alpha=0.3 );
# plt.title('Alcohol and Malic Acid content of the wine dataset');
plt.xlabel('Alcohol',fontsize=13,fontweight='bold');
plt.ylabel('Malic Acid',fontsize=13,fontweight='bold');
plt.yticks(fontproperties='Times New Roman', size=13, weight='bold') # 设置大小及加粗
plt.xticks(fontproperties='Times New Roman', size=13,weight='bold')
plt.legend(loc = 'best');
# plt.grid();
plt.tight_layout() ;# 会自动调整图的参数,使之填充整个图像区域
plt.show()
plot()
plt.show()
运行
效果如图:
参考链接:
[1]: https://blog.csdn.net/sanjianjixiang/article/details/100807176