机器学习第二次作业全部

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt
path='ex2data1.txt'
data=pd.read_csv(path,header=None,names=['Exam1','Exam2','result'])#将data1中的数据引入
positive=data[data['result'].isin([1])]#将结果为1的数字筛选出来
negative=data[data['result'].isin([0])]#将结果为0的数字筛选出来
fig,ax=plt.subplots(figsize=(12,8))
ax.scatter(positive['Exam1'],positive['Exam2'],s=50,c='b',marker='o',label='accpet')#s为size,maker为图形样式
ax.scatter(negative['Exam1'],negative['Exam2'],s=50,c='r',marker='x',label='unaccpet')
ax.legend()
ax.set_xlabel('Exam 1 scroes')
ax.set_ylabel('Exam 2 scroes')#以上为建立统一个普通的坐标系
plt.show()
def sigmoid(z):
    return 1/(1+np.exp(-z))
nums = np.arange(-10, 10, step=1)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(nums, sigmoid(nums), 'r')
plt.show()#由于此为应用于分类算法的逻辑回归所以h(x)函数相应的也应该改变
def cost(theta, X, y):
    theta=np.matrix(theta)
    X=np.matrix(X)
    y=np.matrix(y)
    first = np.multiply(-y,np.log(sigmoid(X*theta.T)))#multiply的相乘为对应位置相乘,如果想点乘那么就要使用np.dot
    second = np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
    return np.sum(first-second)/len(X)
data.insert(0,'ones',1)#向数据中插入一列名称为ones,位置在第一列,数值为1的数据
cols=data.shape[1]#[1]获取data的列
X=data.iloc[:,0:cols-1]#切片
y=data.iloc[:,cols-1:cols]
X=np.array(X.values)
y=np.array(y.values)
theta=np.zeros(3)#由于只有2个参数值,1个结果
print(cost(theta,X,y))
def gradient(theta, X, y):
    theta=np.matrix(theta)
    X=np.matrix(X)
    y=np.matrix(y)
    parameters=int(theta.ravel().shape[1])
    grad=np.zeros(parameters)
    error=sigmoid(X*theta.T)-y
    for i in range(parameters):#该算法中并没有执行梯度下降的算法,而是改为计算一步梯度步长,后面有专门的函数来辅助你计算梯度下降
        term=np.multiply(error,X[:,i])
        grad[i]=np.sum(term)/len(X)
    return grad
print(gradient(theta, X, y))
result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))#该函数为计算梯度下降算法的函数,func为代价,x0为初始计算量
#,fprime为梯度步长,args为数据
print(result)
def predict(theta,X):
    probability=sigmoid(X*theta.T)
    return [1 if x>=0.5 else 0 for x in probability]
the_min=np.matrix(result[0])#提取有函数自动计算的数据
prediction=predict(the_min,X)#查取函数模型预测的数值
correct=[1 if (a==1 and b==1) or (a==0 and b==0) else 0 for (a,b) in zip(prediction,y)]
accuracy=(sum(map(int,correct))%len(correct))#map函数是将第一个参数均匀作用在第二个参数上
print('accuracy={0}%'.format(accuracy))
#计算出准确度
path='ex2data2.txt'
data2=pd.read_csv(path,header=None,names=['Test1','Test2','Accepted'])
print(data2.head())
positive=data2[data2['Accepted'].isin([1])]
negative=data2[data2['Accepted'].isin([0])]
fig,ax=plt.subplots(figsize=(12,8))
ax.scatter(positive['Test1'],positive['Test2'],s=50,c='b',marker='o',label='Accepted')
ax.scatter(negative['Test1'],negative['Test2'],s=50,c='r',marker='x',label='Rejected')
ax.legend()
ax.set_xlabel('Test1 score')
ax.set_ylabel('Test2 score')
plt.show()
degree=5
x1=data2['Test1']
x2=data2['Test2']
data2.insert(3,'ones',1)
#采用创建一组多项式特征来入手,因为该图形的图像分割远不止次方这么简单,所以创建几个参数来拟合曲线
for i in range(1,degree):
    for j in range(0,i):
        data2['F'+str(i)+str(j)]=np.power(x1,i-j)*np.power(x2,j)
data2.drop('Test1',axis=1,inplace=True)#从数据中将test1这一列函数去除,inplace是真正在该段程序中去除
data2.drop('Test2',axis=1,inplace=True)
print(data2.head())
#为防止过度拟合曲线的问题,从而采用正则化代价函数来降低函数的复杂度
def regularized_cost(theta,X,Y,punishrate):
    theta=np.matrix(theta)
    X=np.matrix(X)
    y=np.matrix(Y)
    first=np.multiply(-y,np.log(sigmoid(X*theta.T)))
    second=np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
    reg=(punishrate/(2*len(X)))*np.sum(np.power(theta[:,1:theta.shape[1]],2))
    return np.sum(first-second)/len(X)+reg
#与之前一样计算梯度步长,但是这次加了一个正则化的问题
def gradient(theta,X,Y,learningrate):
    theta=np.matrix(theta)
    X = np.matrix(X)
    Y = np.matrix(Y)
    parameters=int(theta.ravel().shape[1])
    grad=np.zeros(parameters)
    error=sigmoid(X*theta.T)-Y
    for i in range(parameters):
        term=np.multiply(error,X[:,i])
        if(i==0):
            grad[i]=np.sum(term)/len(X)
        else:
            grad[i]=(np.sum(term)/len(X))+((learningrate/len(X))*theta[:,i])
    return grad
#初始化变量为了接下来的进行一系列的操作
cols=data2.shape[1]
X2=data2.iloc[:,1:cols]
y2=data2.iloc[:,0:1]
X2=np.array(X2.values)
y2=np.array(y2.values)
theta=np.zeros(11)
learningRate=1
print(regularized_cost(theta, X2, y2, learningRate))
print(gradient(theta,X2,y2,learningRate))
result2=opt.fmin_tnc(func=regularized_cost,x0=theta,fprime=gradient,args=(X2,y2,learningRate))
print(result2)
theta_min = np.matrix(result2[0])
predictions = predict(theta_min, X2)
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y2)]
accuracy = (sum(map(int, correct)) % len(correct))
print ('accuracy = {0}%'.format(accuracy))

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值