Machine Learning qsc1

lr模型处理某二分类问题,采用随机梯度下降

import numpy as np
import matplotlib.pyplot as plt
import random
import time

def split_data(onelinedata):
    onelinedata = onelinedata.split(",")
    x = list(map(float, onelinedata))
    # x = [float(item) for item in onelinedata]
    # x = [str2float(item) for item in onelinedata]
    return x
def make_train_data(path, chunk_size):
    with open(path) as f:
        chunk_data = f.readlines(chunk_size)
        x = np.array([split_data(item) for item in chunk_data], dtype=np.float16)
    return x
def make_test_data(path, chunk_size):
    with open(path) as f:
        chunk_data = f.readlines(chunk_size)
        x = np.array([split_data(item) for item in chunk_data], dtype=np.float16)
    return x

#begin_time=time.time()
train_path = "./train_data.txt"
test_path1 = "./test_data.txt"
test_path2 = "./answer.txt"
train_chunk_size = 10000  # MB
test_chunk_size = 100  # MB

train_xy = make_train_data(train_path, chunk_size=train_chunk_size*1024*1024)
train_x = train_xy[:, :-1] #input
train_y = train_xy[:,-1] #output
test_x=make_test_data(test_path1,chunk_size=test_chunk_size*1024*1024)
test_y=make_test_data(test_path2,chunk_size=test_chunk_size*1024*1024)
#print(train_x.shape)
#print(train_y.shape)
#print(train_xy.shape)
#print(test_x.shape)
#print(test_y.shape)

def sigmoid(inx):
    if inx>=0:  #对sigmoid函数的优化,避免了出现极大的数据溢出
        return 1.0/(1+np.exp(-inx))
    else:
        return np.exp(inx)/(1+np.exp(inx))

def gradAscnet(input,output,test_x,test_y):
    dataMatrix=np.mat(input)
    labelMatrix=np.mat(output)
    m,n=input.shape
    maxCycles=100
    weights=np.ones((n,1))
    # temp_y=np.zeros(test_y.shape)
    for j in range(maxCycles):
        dataIndex=list(range(m))
        errorsum = np.zeros((1,1))
        for i in range(m):
            alpha=4/(1.0+j+i)+0.01
            randIndex=int(random.uniform(0,len(dataIndex)))

            h=sum(dataMatrix[randIndex,:]*weights)
            # print(labelMatrix[0,randIndex])
            h=sigmoid(h)
            error=labelMatrix[0,randIndex]-h
            weights=weights+alpha*error[0,0]*(dataMatrix[randIndex,:].transpose())
            errorsum=errorsum+error
            del(dataIndex[randIndex])
        # print(weights[0:4,0].transpose(),errorsum)
        # pred_y = predict(test_x, weights)
        # pred_y = np.round(pred_y)
        # print((pred_y.size - sum(abs(pred_y - test_y))) / pred_y.size)
        # #print((pred_y.size - sum(abs(pred_y - temp_y))) / pred_y.size)
        # print(" ")
        # temp_y=pred_y
    return weights


def predict(input,w):
    dataMatrix=np.mat(input)
    weights=np.mat(w)
    output=dataMatrix*weights
    for i in range(output.size):
        output[i,0]=sigmoid(output[i,0])
    return output

weights=gradAscnet(train_x,train_y,test_x,test_y)
pred_y=predict(test_x,weights)

pred_y=np.round(pred_y)
print((pred_y.size-sum(abs(pred_y-test_y)))/pred_y.size) #accuracy rate

#end_time=time.time()
#print(end_time-begin_time)  #second

np.savetxt("./result.txt",pred_y,fmt='%d',delimiter='\n')



 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值