#-- coding:utf-8 --
import pandas as pd
from sklearn import decompositon
#file=‘C:\Users\Administrator\Desktop\shiyan.xlsx’
#df=pd.read_excel(file,index_col=u’nkill’)
df=pd.read_excel(‘C:\Users\Administrator\Desktop\111.xlsx’)
#print(df)
#a=(df-df.min())/(df.max()-df.min()) #最小-最大规范化
#b=(df-df.mean())/df.std() #零-均值规范化
from sklearn.decompositon import PCA
pca=PCA()
pca.fit(df)
c=pca.components_
d=pca.explained_variance_ratio_
print©
print(d)
######################
data=pd.read_excel(‘C:\Users\Administrator\Desktop\test.xlsx’)
data.info()
a=data.corr().head()
b=data.columns
print(a)
print(b)
data.isnull().sum()
c=data[‘nkill’].value_counts().head(11) #nkill中值的个数统计
print©
print(data.groupby(‘weapsubtype1’).size()) #数据分类分布统计
#########
a=data.dropna(subset=[‘nkill’]) #删除缺失值
print(a)
b=a.head()
###########
df=pd.read_excel(‘C:\Users\Administrator\Desktop\test.xlsx’)
le = preprocessing.LabelEncoder()
le.fit(df[“corp1”])
df[“corp1”]=le.transform(df[“corp1”]) #转换字符
print(df)
df.to_csv(‘a.csv’) #生成csv格式