最近想对Excel表格的数据进行归一化,但是用Excel或者spss感觉比较麻烦,所以写了一个Python脚本,实现更加方便。
数据导入:
#导入数据
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# df = pd.read_excel('D:\\零时文件\\data.csv',usecols=[0,1,2,3,4,5,6,7,8,9,10])
df = pd.read_excel('D:\\Datasets\\geoDetector\\采样结果\\最终版本5000m.xlsx')
最值归一化:
# 最值归一化
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df)
scaled_features = scaler.transform(df)
df_MinMax = pd.DataFrame(data=scaled_features, columns=["X1","X2","X3","X4","X5","X6","X7","X8","X9","X10","X11","X12","X13","X14","X15","X16","X17","X18","X19","X20","X21","X22","X23","X24","X25","X26","X27"])
保存数据:
df_MinMax.to_csv('D:\\零时文件\\最终版本5000m最值归一化.csv')#存储到CSV中
均值归一化:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler() # 均值归一化
sc_X = sc_X.fit_transform(df)
sc_X = pd.DataFrame(data=sc_X, columns=["X1","X2","X3","X4","X5","X6","X7","X8","X9","X10","X11","X12","X13","X14","X15","X16","X17","X18","X19","X20","X21","X22","X23","X24","X25","X26","X27"])
保存数据:
sc_X.to_csv('D:\\零时文件\\最终版本5000m均值归一化.csv')#存储到CSV中