note - logistic regression 理论 & code

最新推荐文章于 2024-01-07 21:45:07 发布

如果曾经拥有

最新推荐文章于 2024-01-07 21:45:07 发布

阅读量101

点赞数

分类专栏： ML & DL

原文链接：https://blog.csdn.net/u012162613/article/details/41844495

版权

ML & DL 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

code - numpy

# -*- coding: utf-8 -*-
"""
Created on Tue Dec 09 21:54:00 2014

@author: wepon

程序说明：
loadData函数
实现的功能是从文件夹中读取所有文件，并将其转化为矩阵返回
如调用loadData('train')，则函数会读取所有的txt文件（'0_0.txt'一直到'1_150.txt'）
并将每个txt文件里的32*32个数字转化为1*1024的矩阵，最终返回大小是m*1024的矩阵
同时返回每个txt文件对应的数字，0或1

sigmoid函数
实现sigmoid函数的功能

gradAscent函数
用梯度下降法计算得到回归系数

classfy函数
根据回归系数对输入的样本进行预测

"""
# https://blog.csdn.net/u012162613/article/details/41844495
#!/usr/bin/python
from numpy import *
from os import listdir


def loadData(direction):
    trainfileList=listdir(direction) # 返回指定的文件夹包含的文件或文件夹的名字的列表
    m=len(trainfileList)   # 训练集有m个文件
    dataArray= zeros((m,1024))
    labelArray= zeros((m,1))
    for i in range(m):
        returnArray=zeros((1,1024))  #每个txt文件形成的特征向量
        filename=trainfileList[i]
        fr=open('%s/%s' %(direction,filename))
        for j in range(32):
            lineStr=fr.readline()
            for k in range(32):
                returnArray[0,32*j+k]=int(lineStr[k])
        dataArray[i,:]=returnArray   #存储特征向量
    
        filename0=filename.split('.')[0]
        label=filename0.split('_')[0]
        labelArray[i]=int(label)     #存储类别
    return dataArray,labelArray
    
def sigmoid(inX):
    return 1.0/(1+exp(-inX))

#alpha:步长，maxCycles:迭代次数，可以调整
def gradAscent(dataArray,labelArray,alpha,maxCycles):
    dataMat=mat(dataArray)    #size:m*n
    labelMat=mat(labelArray)      #size:m*1
    m,n=shape(dataMat)
    weigh=ones((n,1)) 
    for i in range(maxCycles):
        h=sigmoid(dataMat*weigh)
        error=labelMat-h    #size:m*1
        weigh=weigh+alpha*dataMat.transpose()*error
    return weigh

def classfy(testdir,weigh):
    dataArray,labelArray=loadData(testdir)
    dataMat=mat(dataArray)
    labelMat=mat(labelArray)
    h=sigmoid(dataMat*weigh)  #size:m*1
    m=len(h)
    error=0.0
    for i in range(m):
        if int(h[i])>0.5:
            print (int(labelMat[i]),'is classfied as: 1')
            if int(labelMat[i])!=1:
                error+=1
                print ('error')
        else:
            print (int(labelMat[i]),'is classfied as: 0')
            if int(labelMat[i])!=0:
                error+=1
                print ('error')
    print ('error rate is:','%.4f' %(error/m))
                
def digitRecognition(trainDir,testDir,alpha=0.07,maxCycles=10):
    data,label=loadData(trainDir)
    weigh=gradAscent(data,label,alpha,maxCycles)
    classfy(testDir,weigh)
    

file_A = ''
file_B = ''
digitRecognition(file_A ,file_B ,alpha=0.07,maxCycles=10)

code - tf

import tensorflow as tf

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('mnist_path_here', one_hot=True)

# Parameters
learning_rate = 0.01
training_epochs = 25
batch_size = 100
display_step = 1

# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# Construct model
pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Fit training using batch data
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs, y: batch_ys})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))

    print ("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print ("Accuracy:", accuracy.eval({x: mnist.test.images[:3000], y: mnist.test.labels[:3000]}))

如果曾经拥有

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
note - logistic regression 理论 & code

code - numpy# -*- coding: utf-8 -*-"""Created on Tue Dec 09 21:54:00 2014@author: wepon程序说明：loadData函数实现的功能是从文件夹中读取所有文件，并将其转化为矩阵返回如调用loadData('train')，则函数会读取所有的txt文件（'0_0.txt'一直到'1_150.txt'）并将每个txt文件里的32*32个数字转化为1*1024的矩阵，最终返回大小是m*1024的矩阵同时返回
复制链接

扫一扫

专栏目录