逻辑回归之考试是否录取

一、其中LogisticRegression是自己写的模块,该模块是波士顿房价预测里的代码 波士顿房价预测

二、完全代码

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from LogisticRegression import gradDescent,cost_function,accuracy,feature_scalling

def load_data():
    data = pd.read_csv('./data/LogiReg_data.txt', names=['exam1', 'exam2', 'label']).as_matrix()#加names是添加pa格式的列的标签,as_matrix是做成numpy格式的数据,没有了横和竖的标签
    X = data[:, :-1]  # 取前两列
    y = data[:, -1:]  # 取最后一列
    print(X)
    print(X.shape)#100*2
    print(X.shape[0])#100
    shuffle_index = np.random.permutation(X.shape[0])#其中X.shape[0]=100,这一行的作用是将列的索引打乱。np.random.permutation为随机打乱函数
    print(shuffle_index)#从0到99随机打乱的数组
    X = X[shuffle_index]
    print(X)
    y = y[shuffle_index]
    return X, y#得到打乱的特征X和打乱的标签y


def visualize_data(X, y):
    positive = np.where(y == 1)[0]#得到正样本的索引
    negative = np.where(y == 0)[0]#得到负样本的索引
    plt.scatter(X[positive,0],X[positive,1],s=30,c='b',marker='o',label='Admitted')
    plt.scatter(X[negative,0],X[negative,1],s=30,c='r',marker='o',label='Not Admitted')
    plt.legend()#用于显示标注admitted和not admitted
    plt.show()

def visualize_cost(ite,cost):#用来画出代价函数和迭代次数曲线
    plt.plot(np.linspace(0,ite,ite),cost,linewidth=1)#np.linspace(0,ite,ite)表均分为#10000份,不是横坐标显示出一万份,因为cost对应有10000份,所以np.linspace也应该有10000份
    plt.title('cost history',color='r')
    plt.xlabel('iterations')
    plt.ylabel('cost J')
    plt.show()


if __name__ == '__main__':
    # Step 1.  Load data
    X, y = load_data()
    # Step 2.  Visualize data
    visualize_data(X, y)
    #
    m, n = X.shape#100*2
    X = feature_scalling(X)
    alpha = 0.1
    W = np.random.randn(n, 1)
    b = 0.1
    maxIt = 10000
    W, b, cost_history = gradDescent(X, y, W, b, alpha, maxIt)
    print("******************")
    print(cost_history[:20])
    visualize_cost(maxIt,cost_history)
    print("accuracys is :         " + str(accuracy(X, y, W, b)))
    print("W:",W)
    print("b: ",b)
    print("******************")

三、数据

https://github.com/TolicWang/MachineLearningWithMe/blob/master/Lecture_02/data/LogiReg_data.txt

四、用sklearn来实现

代码:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from LogisticRegression import feature_scalling
from sklearn.linear_model import LogisticRegression

def load_data():
    data = pd.read_csv('./data/LogiReg_data.txt', names=['exam1', 'exam2', 'label']).as_matrix()
    X = data[:, :-1]  # 取前两列
    y = data[:, -1:]  # 取最后一列
    shuffle_index = np.random.permutation(X.shape[0])
    X = X[shuffle_index]
    y = y[shuffle_index]
    return X, y


def visualize_cost(ite,cost):
    plt.plot(np.linspace(0,ite,ite),cost,linewidth=1)
    plt.title('cost history',color='r')
    plt.xlabel('iterations')
    plt.ylabel('cost J')
    plt.show()


if __name__ == '__main__':
    X, y = load_data()
    X = feature_scalling(X)
    lr = LogisticRegression()
    lr.fit(X,y)
    print("******************")
    print("accuracys is :" ,lr.score(X,y))
    print("W:{},b:{}".format(lr.coef_,lr.intercept_))
    print("******************")

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值