# python机器学习-预测分析核心算法3-2代码在python3下运行遇到的一些问题

from sklearn import datasets, linear_model
from sklearn.metrics import roc_curve, auc
import pylab as pl

def confusionMatrix(predicted, actual, threshold):
if len(predicted) != len(actual):
return -1
tp = 0.0#true-postition 真正
fp = 0.0
tn = 0.0
fn = 0.0

for i in range(len(actual)):
if actual[i] > 0.5:
if predicted[i] > threshold:
tp += 1.0
else:
fn += 1.0
else:
if predicted[i] < threshold:
tn += 1.0
else:
fp += 1.0
rtn = [tp, fn, fp, tn]
return rtn

target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data")#书上的网址是https://我在网上输入带s的
#网址网也会打不开，去掉s就可以了，不知道其他同学有没有这种问题
data = urllib.request.urlopen(target_url)

xList = []
labels = []
for line in data:
row = line.strip().split(",".encode(encoding='utf-8'))

if(row[-1] == b'M'): #原书中代码是row[-1] == 'M'因为我们上面修改了代码，使row[-1]值变成了b'M',...,b'R'，如果不加以修改，那么下面得到的所有labels值都为零，调试了好久才发现的
labels.append(1.0)
else:
labels.append(0.0)
row.pop()
floatRow = [float(num) for num in row]
xList.append(floatRow)
#print(labels)       调试所用可忽略
#print(len(labels))  调试所用

indices = range(len(xList))
xListTest = [xList[i] for i in indices if i%3 == 0]
xListTrain = [xList[i] for i in indices if i%3 != 0]
labelsTest = [labels[i] for i in indices if i%3 == 0]
labelsTrain = [labels[i] for i in indices if i%3 != 0]

xTrain = numpy.array(xListTrain)
yTrain = numpy.array(labelsTrain)
xTest = numpy.array(xListTest)
yTest = numpy.array(labelsTest)
#print('yTrain = ', yTrain)
#print('yTest = ', yTest)

print("Shape of xTrain array", xTrain.shape)
print("Shape of yTrain array", yTrain.shape)
print("Shape of xTest array", xTest.shape)
print("Shape of yTest array", yTest.shape)

rocksVMinesModel = linear_model.LinearRegression()
rocksVMinesModel.fit(xTrain, yTrain)

trainingPredictions = rocksVMinesModel.predict(xTrain)
#print('trainingPredictions = ', trainingPredictions)
print("Some values predicted by model", trainingPredictions[0:5], trainingPredictions[-5:-1])

confusionMatTrain = confusionMatrix(trainingPredictions, yTrain, 0.5)
tp = confusionMatTrain[0]
fn = confusionMatTrain[1]
fp = confusionMatTrain[2]
tn = confusionMatTrain[3]

print("tp = " + str(tp) + "\tfn = " + str(fn) + "\n" + "fp = " + str(fp) + "\ttn = " + str(tn) + '\n')

testPredictions = rocksVMinesModel.predict(xTest)

conMatTest = confusionMatrix(testPredictions, yTest, 0)
tp = conMatTest[0]
fn = conMatTest[1]
fp = conMatTest[2]
tn = conMatTest[3]
print("tp = " + str(tp) + "\tfn = " + str(fn) + "\n" + "fp = " + str(fp) + "\ttn = " + str(tn) + '\n')

fpr, tpr, thresholds = roc_curve(yTrain, trainingPredictions)
roc_auc = auc(fpr, tpr)
print('AUC for in-sample ROC curve: %f' % roc_auc)

#plot ROC curve
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k-')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('In sample ROC rocks versus mines')
pl.legend(loc="lower right")
pl.show()

fpr, tpr, thresholds = roc_curve(yTest, testPredictions)
roc_ayc = auc(fpr, tpr)
print('AUC for out-of-sample ROC curve: %f' % roc_auc)

#plot ROC curve
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k-')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Out-of-sample ROC rocks versus mines')
pl.legend(loc="lower right")
pl.show()


#-------------------------------------------------------------------------------------
Traceback (most recent call last):
File "D:\Python362\a_机器学习及实战\3_2.py", line 38, in <module>
row = line.strip().split(",")
TypeError: a bytes-like object is required, not 'str'
#--------------------------------------------------------------------------------------

----------------------------------------------------------------------------------------------
b'R'
b'R'
b'R'
b'R'
b'R'
b'R'
b'R'
...
-------------------------------------------------------------------------------------------------

'''

Shape of xTrain array (138, 60)
Shape of yTrain array (138,)
Shape of xTest array (70, 60)
Shape of yTest array (70,)
Some values predicted by model [-0.10240253  0.42090698  0.38593034  0.36094537  0.31520494] [ 1.12242751  0.77626699  1.02016858  0.66338081]
tp = 68.0	fn = 6.0
fp = 7.0	tn = 57.0

tp = 36.0	fn = 1.0
fp = 19.0	tn = 14.0

AUC for in-sample ROC curve: 0.979519
AUC for out-of-sample ROC curve: 0.979519
>>>


