decisoin tree:
# -*- coding: utf-8 -*-
import sklearn
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
import numpy
def getData_1():
iris = datasets.load_iris()
X = iris.data #样本特征矩阵,150*4矩阵,每行一个样本,每个样本维度是4
y = iris.target #样本类别矩阵,150维行向量,每个元素代表一个样本的类别
df1=pd.DataFrame(X, columns =['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm'])
df1['target']=y
return df1
df=getData_1()
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:3],df['target'], test_size=0.3, random_state=42)
print X_train, X_test, y_train, y_test
model = tree.DecisionTreeClassifier(criterion='gini') #cart树
model.fit(X_train, y_train)
model2= tree.DecisionTreeClassifier(criterion='entropy') #c4.5树
model2.fit(X_train, y_train)
print 'cart树:{:.3f}'.format(model.score(X_test, y_test)) # 决策树
print 'c4.5树::{:.3f}'.format(model2.score(X_test, y_test))
结果:输出的准确度
LinearRegression:
# -*- coding: utf-8 -*-
import sklearn
from sklearn.datasets.samples_generator import make_classification
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
X, y = make_classification(n_samples=2400, n_features=5, n_informative=2,
n_redundant=2, n_classes=2, n_clusters_per_class=2, scale=1.0,
random_state=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = LinearRegression(fit_intercept=True, normalize=False,
copy_X=True, n_jobs=1)
model.fit(X_train, y_train)
print 'FINISH'
print model.score(X_train, y_train) # 线性回归:R square; 分类问题: acc
print model.score(X_test, y_test)
print X_train,y_train
print X_test,y_test