python机器学习4-6代码及运行结果

import urllib.request
from pylab import *
from math import sqrt
import matplotlib.pyplot as plot

target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data")
data = urllib.request.urlopen(target_url)

xList = []
labels = []

for line in data:
    row = line.strip().split(",".encode(encoding='utf-8'))
    labels.append(float(row.pop()))
    xList.append(row)

names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']

#把数据中的性别用数字表示,然后加到属性列表中
xCoded = []
for row in xList:
    codedSex = [0.0, 0.0]
    if row[0] == b'M': codedSex[0] = 1.0
    if row[0] == b'F': codedSex[1] = 1.0

    numRow = [float(row[i]) for i in range(1, len(row))]
    rowCoded = list(codedSex) + numRow
    xCoded.append(rowCoded)

namesCoded = ['Sex1', 'Sex2', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']
nrows = len(xCoded)
ncols = len(xCoded[1])

xMeans = []
xSD = []
for i in range(ncols):
    col = [xCoded[j][i] for j in range(nrows)]
    mean = sum(col) / nrows
    xMeans.append(mean)
    colDiff = [(xCoded[j][i] - mean) for j in range(nrows)]
    sumSq = sum([colDiff[i] * colDiff[i] for i in range(nrows)])
    stdDev = sqrt(sumSq / nrows)
    xSD.append(stdDev)

xNormalized = []
for i in range(nrows):
    rowNormalized = [(xCoded[i][j] - xMeans[j])/xSD[j] for j in range(ncols)]
    xNormalized.append(rowNormalized)
    
meanLabel = sum(labels) / nrows
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range(nrows)]) / nrows)
labelNormalized = [(labels[i] - meanLabel) / sdLabel for i in range(nrows)]

beta = [0.0] * ncols

betaMat = []
betaMat.append(list(beta))

nSteps = 350
stepSize = 0.004
nzList = []

for i in range(nSteps):
    residuals = [0.0] * nrows
    for j in range(nrows):
        labelsHat = sum([xNormalized[j][k] * beta[k] for k in range(ncols)])
        residuals[j] = labelNormalized[j] - labelsHat
        
    corr = [0.0] * ncols
    for j in range(ncols):
        corr[j] = sum([xNormalized[k][j] * residuals[k] for k in range(nrows)]) / nrows

    iStar = 0
    corrStar = corr[0]

    for j in range(1, (ncols)):
        if abs(corrStar) < abs(corr[j]):
            iStar = j
            corrStar = corr[j]

    beta[iStar] += stepSize * corrStar / abs(corrStar)
    betaMat.append(list(beta))

    nzBeta = [index for index in range(ncols) if beta[index] !=0]
    for q in nzBeta:
        if(q in nzList) == False:
            nzList.append(q)
print("nzList = ", nzList)
nameList = [namesCoded[nzList[i]] for i in range(len(nzList))]
print(nameList)
for i in range(ncols):
    coefCurve = [betaMat[k][i] for k in range(nSteps)]
    xaxis = range(nSteps)
    plot.plot(xaxis, coefCurve)
plot.xlabel("Steps Taken")
plot.ylabel(("Coefficient Values"))
plot.show()

运行结果:

nzList =  [8, 4, 1, 6, 3, 0]
['Shell weight', 'Height', 'Sex2', 'Shucked weight', 'Diameter', 'Sex1']
运行效果图:



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值