python机器学习4-4代码及运行结果

import urllib.request
import sys
from math import sqrt
import matplotlib.pyplot as plot

target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data")
data = urllib.request.urlopen(target_url)
xList = []

for line in data:
    row = line.strip().split(",".encode(encoding='utf-8'))
    xList.append(row)

xNum = []
labels = []

for row in xList:
    lastCol = row.pop()#将row最后一列删除并且返回给lastCol
    if lastCol == b'M':
        labels.append(1.0)
    else:
        labels.append(0.0)
    attrRow = [float(elt) for elt in row]
    xNum.append(attrRow)

nrow = len(xNum)
ncol = len(xNum[1])
#计算每个列属性的均值和均方根误差
xMeans = []
xSD = []
for i in range(ncol):
    col = [xNum[j][i] for j in range(nrow)]
    mean = sum(col) / nrow
    xMeans.append(mean)
    colDiff = [(xNum[j][i] - mean) for j in range(nrow)]
    sumSq = sum(colDiff[i] * colDiff[i] for i in range(nrow))
    stdDev = sqrt(sumSq / nrow)
    xSD.append(stdDev)

#通过上面计算的均方根误差对每一个xNum进行标准化
xNormalized = []
for i in range(nrow):
    rowNormalized = [(xNum[i][j] - xMeans[j]) / xSD[j] for j in range(ncol)]
    xNormalized.append(rowNormalized)
    
#计算分类标签的标准化
meanLabel = sum(labels) / nrow
sdLabel = sqrt(sum([(labels[i] - meanLabel) *(labels[i] - meanLabel) for i in range(nrow)]) / nrow)
labelsNormalized = [(labels[i] - meanLabel) / sdLabel for i in range(nrow)]

beta = [0.0] * ncol
betaMat = []
betaMat.append(list(beta))

nSteps = 350
stepSize = 0.004
nzList = []
for i in range(nSteps):
    residuals = [0.0] * nrow
    for j in range(nrow):
        labelsHat = sum([xNormalized[j][k] * beta[k] for k in range(ncol)]) #相当于x0 * beta0 + x1 * beta1 + x2 * beta2 ...
        residuals[j] = labelsNormalized[j] - labelsHat#相当于y - (x0 * beta0 + x1 * beta1 + x2 * beta2 ...)

    corr = [0.0] * ncol
    for j in range(ncol):
        corr[j] = sum([xNormalized[k][j] * residuals[k] for k in range(nrow)]) / nrow#每个属性列与对应的残差相乘最终将各个属性下的元素相加除以行数即每个属性个数,即可得到corr

    iStar = 0
    corrStar = corr[0]

    for j in range(1, (ncol)):
        if abs(corrStar) < abs(corr[j]):
            iStar = j
            corrStar = corr[j]

    beta[iStar] += stepSize * corrStar / abs(corrStar)
    betaMat.append(list(beta))

    nzBeta = [index for index in range(ncol) if beta[index] != 0.0]
    for q in nzBeta:
        if(q in nzList) == False:
            nzList.append(q)

names = ['V' + str(i) for i in range(ncol)]
nameList = [names[nzList[i]] for i in range(len(nzList))]
print(nameList)
for i in range(ncol):
    coefCurve = [betaMat[k][i] for k in range(nSteps)]
    xaxis = range(nSteps)
    plot.plot(xaxis, coefCurve)

plot.xlabel("Steps Taken")
plot.ylabel(("Coefficient Values"))
plot.show()

输出结果:

['V10', 'V48', 'V44', 'V11', 'V35', 'V51', 'V20', 'V3', 'V21', 'V15', 'V43', 'V0', 'V22', 'V45', 'V53', 'V27', 'V30', 'V50', 'V58', 'V46', 'V56', 'V28', 'V39']


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值