机器学习10大经典算法 python实现

这里列举了10种常用的机器学习算法,主要是调用sklearn库的算法。另外,我自己也实现了其中的部分算法,会在之后附录在相应算法旁边,待更新。
1、svm(支持向量机):

from sklearn.datasets import load_iris 
import  numpy as np
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.model_selection import train_test_split
if __name__ == '__main__':

    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        clf = svm.SVC(C=0.8, kernel='rbf', gamma=20, decision_function_shape='ovr')
        clf.fit(train_X, train_y)
        #print(clf.score(trainDataSet, trainTarget))  # 精度
        preY = clf.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('SVM')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

2、LR(逻辑回归):

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import  numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
if '__main__' == __name__:
    
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        #x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = LogisticRegression(penalty='l2')
        model.fit(train_X, train_y)
        preY=model.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('LR')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

3、LDA(线性回归):

import numpy as np
from sklearn.datasets import load_iris
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

if '__main__' == __name__:
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        lda = LinearDiscriminantAnalysis(n_components=2)
        lda.fit(train_X, train_y)
        X_new = lda.transform(train_X)
        preY=lda.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('LDA')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

4、KNN(最近邻算法):
KNN 手写数字识别

from sklearn.datasets import load_iris 
from sklearn.model_selection import train_test_split
import  numpy as np
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.model_selection import train_test_split
#下载iris鸢尾花数据集
# 和切分数据集为训练集和测试集
# 训练集:trainX、trainY;
# 测试集:testX、testY
#KNN算法实现
#testX为测试样本及
# k为邻近的样本点数量
def KNN(trainX,trainY,testX,k=5):
    predY = []
    for x in testX:
        # 计算样本点训练集的欧氏距离
        distance = [np.sqrt(np.sum(np.power(x_train - x, 2))) for x_train in trainX]
        # 从小到大排序,每个数的索引位置
        indexSort = np.argsort(distance)
        # 获得距离样本点最近的K个点的标记值y
        nearK_y = [trainY[i] for i in indexSort[:k]]
        # 统计邻近K个点标记值的数量
        cntY = Counter(nearK_y)
        # 返回标记值最多的那个标记
        y_predict = cntY.most_common(1)[0][0]
        predY.append(y_predict)
    return predY                             
if __name__ == '__main__':
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        preY=KNN(train_X,train_y,test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('KNN')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

5、决策树
分类决策树
回归决策树

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
import  numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
if '__main__' == __name__:
    
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        #x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = DTC()
        model.fit(train_X, train_y)
        preY=model.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('LR')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

7、AdaBoost(集成学习):

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier as ADA
import  numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
if '__main__' == __name__:
    
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        #x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = ADA()
        model.fit(train_X, train_y)
        preY=model.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('LR')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

8、Naive bayes(朴素贝叶斯)

from sklearn.datasets import load_iris 
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import train_test_split
if __name__ == '__main__':
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        clf = MultinomialNB()
        clf = clf.fit(train_X,train_y)
        preY=clf.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('NB')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

9、Kmeans(K均值聚类):

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import cluster
import  numpy as np
from sklearn.metrics import accuracy_score
if '__main__' == __name__:
    
    allPre=[]
    for i in range(1,11):
        train_X,test_X, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        #x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = cluster.KMeans(init='k-means++', n_clusters=4, random_state=8)
        model.fit(train_X,train_y)
        preY=model.predict(test_X)
        allPre.append(accuracy_score(preY,test_y))
    print('LR')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))

10、PCA(主成分分析):

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import  numpy as np
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
if '__main__' == __name__:
    
    allPre=[]
    for i in range(1,11):
        train_x,test_x, train_y, test_y = train_test_split(load_iris().data,
                                                   load_iris().target,
                                                   test_size = 0.2,
                                                   random_state = i)
        model = PCA(n_components=0.95)    
        model.fit(train_x, test_x)
        pca_train_x=model.fit_transform(train_x)
        pca_test_x=model.fit_transform(test_x)
        knn = KNeighborsClassifier() #PCA不能直接分类,它只是降维,分类需要其它方法
        knn.fit(pca_train_x, train_y)
        preY=knn.predict(pca_test_x)
        allPre.append(accuracy_score(preY,test_y))
    print('LR')
    for i in range(0,10):    
        print('The test:',i+1,', Accuracy is %.2f'%allPre[i])
    print('The average is %.2f'%np.mean(allPre))
  • 1
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值