第十章 预测性分析和机器学习
监督学习
无监督学习
强化学习
1 scikit-learn
略
2 预处理
import numpy as np from sklearn import preprocessing from scipy.stats import anderson # 加载数据 rain = np.load('rain.npy') rain = .1 * rain rain[rain < 0] = .05 / 2 # 期望值 标准差和安德森 print("Rain mean", rain.mean()) print("Rain Variance", rain.var()) print("Anderson Rain", anderson(rain)) scaled = preprocessing.scale(rain) print("Scaled mean", scaled.mean()) print("Scaled Variance", scaled.var()) print("Anderson Scaled", anderson(scaled)) # 把特征值从数值型转换布尔型 binarized = preprocessing.binarize(rain) print("binarized", np.unique(binarized), binarized.sum()) # 分类标准类别,输出0-62之间的整数 lb = preprocessing.LabelBinarizer() lb.fit(rain.astype(int)) print(lb.classes_)
运行结果如下:
Rain mean 2.17919594267
Rain Variance 18.803443919
Anderson Rain AndersonResult(statistic=inf,critical_values=array([ 0.576, 0.656, 0.787, 0.918, 1.092]), significance_level=array([ 15. , 10. , 5. , 2.5, 1. ]))
Scaled mean 3.41301602808e-17
Scaled Variance 1.0
Anderson ScaledAndersonResult(statistic=inf, critical_values=array([ 0.576, 0.656, 0.787, 0.918, 1.092]), significance_level=array([ 15., 10. , 5. , 2.5, 1. ]))
binarized [ 0. 1.] 24594.0
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 2324
2526 27 28 29 30 31 32 33 34 35 36 37 38 39 40 42 43 44 45 46 47 48 49 50
5253 55 58 61 62]
3 基于逻辑回归的分类
该算法可以用以预测事件发生的概率,或是事物是否属于一类别的概率
from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import KFold from sklearn import datasets import numpy as np def classify(x, y): # 使用逻辑回归进行分类 clf = LogisticRegression(random_state=12) scores = [] # k-折交叉验证 kf = KFold(len(y), n_folds=10) # 检查分类的状确性 for train, test in kf: clf.fit(x[train], y[train]) scores.append(clf.score(x[test], y[test])) print(np.mean(scores)) # 加载数据信息 rain = np.load('rain.npy') dates = np.load('doy.npy') # 使用日期和降雨量来构建数组 x = np.vstack((dates[:-1], rain[:-1])) # 无雨,小雨,雨 y = np.sign(rain[1:]) classify(x.T, y) iris = datasets.load_iris() x = iris.data[:, :2] y = iris.target classify(x, y)
运行结果如下:
0.576726256477
0.413333333333
4 基于支持向量机的分类
支持向量机 Support vector machines SVM
支持向量回归 Support vector Regression SVR
可以用来进行回归分析,也可以用来分类
示例代码如下:
from sklearn.svm