python机器学习3-5代码

最新推荐文章于 2020-12-03 10:07:01 发布

llx1026

最新推荐文章于 2020-12-03 10:07:01 发布

阅读量543

点赞数

分类专栏： python机器学习-预测分析核心算法

本文链接：https://blog.csdn.net/llx1026/article/details/77856183

版权

python机器学习-预测分析核心算法专栏收录该内容

10 篇文章 1 订阅

订阅专栏

import urllib.request
import numpy
from sklearn import datasets, linear_model
from math import sqrt
import matplotlib.pyplot as plt

target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv")
data = urllib.request.urlopen(target_url)

xList = []
labels = []
names = []
firstLine = True
for line in data:
    if firstLine == True:
        names = line.strip().split(";".encode(encoding='utf-8'))
        firstLine = False
    else:
        row = line.strip().split(";".encode(encoding='utf-8'))
        labels.append(float(row[-1]))
        row.pop()#python中pop默认删除row最后一个元素，row.pop(i)就是删除row第i+1个元素，因为列表索引是从0开始的，pop删除元素并且可以将其返回
        floatRow = [float(num) for num in row]
        xList.append(floatRow)

indices = range(len(xList))
xListTest = [xList[i] for i in indices if i%3 == 0]
xListTrain = [xList[i] for i in indices if i%3 != 0]
labelsTest = [labels[i] for i in indices if i%3 == 0]
labelsTrain = [labels[i] for i in indices if i%3 != 0]

xTrain = numpy.array(xListTrain)
yTrain = numpy.array(labelsTrain)
xTest = numpy.array(xListTest)
yTest = numpy.array(labelsTest)

alphaList = [0.1**i for i in [0, 1, 2, 3, 4, 5, 6]]

rmsError = []
for alph in alphaList:
    wineRidgeModel = linear_model.Ridge(alpha=alph)
    wineRidgeModel.fit(xTrain, yTrain)
    rmsError.append(numpy.linalg.norm((yTest-wineRidgeModel.predict(xTest)), 2) / sqrt(len(yTest)))#numpy.linalg.norm(,2)表示L2范数，如sqrt((yTest-wineRidgeModel.predict(xTest)) ** 2)其实就是求预测误差的标准差
print("RMS Error                  alpha")
for i in range(len(rmsError)):
    print(rmsError[i], alphaList[i])

x = range(len(rmsError))
plt.plot(x, rmsError, 'k')
plt.xlabel('-log(alpha)')
plt.ylabel('Error (RMS)')
plt.show()

indexBest = rmsError.index(min(rmsError))#求出均方根误差最小的值所对应的索引
alph = alphaList[indexBest]
wineRidgeModel = linear_model.Ridge(alpha=alph)
wineRidgeModel.fit(xTrain, yTrain)
errorVector = yTest - wineRidgeModel.predict(xTest)
plt.hist(errorVector)
plt.xlabel("Bin Boundaries")
plt.ylabel("Counts")
plt.show()

plt.scatter(wineRidgeModel.predict(xTest), yTest, s=100, alpha=0.10)
plt.xlabel('Predicted Taste Score')
plt.ylabel('Actual Taste Score')
plt.show()

运行结果如下：

RMS Error                  alpha
0.659578817634 1.0
0.657861091881 0.1
0.657617214464 0.010000000000000002
0.657521648264 0.0010000000000000002
0.657419068011 0.00010000000000000002
0.657394162885 1.0000000000000003e-05
0.657391308716 1.0000000000000004e-06

llx1026

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python机器学习3-5代码

import urllib.requestimport numpyfrom sklearn import datasets, linear_modelfrom math import sqrtimport matplotlib.pyplot as plttarget_url = ("http://archive.ics.uci.edu/ml/machine-learning-datab
复制链接

扫一扫

专栏目录