import urllib.request
import numpy
from sklearn import datasets, linear_model
from sklearn.metrics import roc_curve, auc
import pylab as plt
target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data")
data = urllib.request.urlopen(target_url)
xList = []
labels = []
for line in data:
row = line.strip().split(",".encode(encoding='utf-8'))
if(row[-1] == b'M'):
labels.append(1.0)
else:
labels.append(0.0)
row.pop()
floatRow = [float(num) for num in row]
xList.append(floatRow)
indices = range(len(xList))
xListTest = [xList[i] for i in indices if i%3 == 0]
xListTrain = [xList[i] for i in indices if i%3 != 0]
labelsTest = [labels[i] for i in indices if i%3 == 0]
labelsTrain = [labels[i] for i in indices if i%3 != 0]
xTrain = numpy.array(xListTrain)
yTrain = numpy.array(labelsTrain)
xTest = numpy.array(xListTest)
yTest = numpy.array(labelsTest)
print(xTrain.shape);print(yTrain.shape);print(xTest.shape);print(yTest.shape)
alphaList = [0.1**i for i in [-3, -2, -1, 0, 1, 2, 3, 4, 5]]
aucList = []
for alph in alphaList:
rocksVMinesRidgeModel = linear_model.Ridge(alpha=alph)
rocksVMinesRidgeModel.fit(xTrain, yTrain)
fpr, tpr, thresholds = roc_curve(yTest, rocksVMinesRidgeModel.predict(xTest))
roc_auc = auc(fpr, tpr)
aucList.append(roc_auc)
print("AUC alpha")
for i in range(len(aucList)):
print(aucList[i], alphaList[i])
x = [-3, -2, -1, 0, 1, 2, 3, 4, 5]
plt.plot(x, aucList)
plt.xlabel('-log(alpah)')
plt.ylabel('AUC')
plt.show()
indexBest = aucList.index(max(aucList))
alph = alphaList[indexBest]
rocksVMinesRidgeModel = linear_model.Ridge(alpha=alph)
rocksVMinesRidgeModel.fit(xTrain, yTrain)
plt.scatter(rocksVMinesRidgeModel.predict(xTest), yTest, s=100, alpha=0.25)
plt.xlabel("Predicted Value")
plt.ylabel("Actual Value")
plt.show()
运行结果:
AUC alpha
0.841113841114 999.9999999999999
0.864045864046 99.99999999999999
0.907452907453 10.0
0.9180999181 1.0
0.882882882883 0.1
0.861588861589 0.010000000000000002
0.851760851761 0.0010000000000000002
0.850941850942 0.00010000000000000002
0.849303849304 1.0000000000000003e-05