1.单变量特征选定
#通过卡方检验选定数据特征
from pandas import read_csv
from numpy import set_printoptions
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
#导入数据
filename = 'pima_data.csv'
names = ['preg','plas','pres','skin','test', 'mass','pedi','age','class']
data = read_csv(filename,names=names)
#将数据分为输入数据和输出数据
array=data.values
X=array[:,0:8]
Y=array[:,8]
#特征选定
test=SelectKBest(score_func=chi2,k=4)
fit=test.fit(X,Y)
#设定数据的打印格式
set_printoptions(precision=3)
print fit.scores_
features=fit.transform(X)
print features