import numpy as np
import csv
def sig(x):
''' sigmoid函数
input: X(mat) :feature_data*w
output: sigmoid(x)(mat):sigmoid的值
'''
sigmoidx=1/(1+np.exp(-x))
sigmoidx=np.mat(sigmoidx)
return sigmoidx
def lr_train_bgd(feature_data,label_data,maxCycle,alpha):
''' 利用梯度下降法训练LR模型
input: feature_data(mat)样例数据
label_data(mat)标签数据
maxCyxle(int)最大迭代误差
alpha(float)学习率
'''
s=feature_data.shape[1]
sta=np.mat(np.zeros((s,1)))
sta[0,0]=-2
sta[1,0]=2
sta[2,0]=-2
j=0
while j<maxCycle:
sta_temp=sta[:]
deri_double=0
deri=np.mat(np.zeros((s,1)))
for i in range(feature_data.shape[0]):
xi=feature_data[i,:]
temp=np.dot(xi,sta)
deri_double=deri_double+ np.dot(xi,np.transpose(xi))*(np.exp(temp)/(1+np.exp(temp))/(1+np.exp(temp)))
temp1=np.exp(temp)/(1+np.exp(temp))
temp1=temp1[0,0]
deri=deri+np.transpose(xi)*(temp1-label_data[i,0])
sta=sta-deri*deri_double
print(sta)
j=j+1
return sta
def model(w,feature_data):
'''模型计算预测值函数
input:w(mat)模型权重
feature_data(mat) 训练数据特征
output:h(mat) 预测值
'''
x=feature_data*w
h=sig(x)
g=h.shape[0]
for i in range(0,g):
if h[i]>0.5:
h[i]=1
elif h[i]<0.5:
h[i]=0
return h
def error_rate(h,label_data):
''' 计算当前的损失函数正确率
input: h(mat)预测值
label_data 实际值
output: error(float)错误率
'''
thr=0
numbel=label_data.shape[0]
for i in range(0,numbel):
if h[i]==label_data[i]:
thr=thr+1
error=thr/numbel
return error,numbel
def load_data(file_name):
''' 数据导入函数
input: file-name(string)的训练数据的位置
output: feature_data(mat)特征
label_data(mat)标签
'''
with open(file_name,encoding='utf-8-sig')as f:
returnMat=np.loadtxt(f,delimiter=',',skiprows=1)
print(returnMat)
returnMat=np.mat(returnMat)
returnMat=np.delete(returnMat,0,axis=1)
label_data=returnMat[:,-1]
feature_data=np.delete(returnMat,-1,axis=1)
a=np.ones((feature_data.shape[0],1))
feature_data=np.c_[feature_data,a]
return feature_data,label_data
if __name__ == "__main__":
# 1.导入训练数据
print("-----1.load data-----")
feature_data,label_data=load_data("..\data\watermelon_3a.csv")
text_data,text_label_data=load_data("..\data\watermelon_3a.csv")
print(feature_data)
#2.训练LR模型
print("-----2.training------")
w= lr_train_bgd(feature_data,label_data,3000,0)
#3.保存最终模型
# print("-----3.save model----")
#save_model("weights",w)
#3.构建最终模型,输出模型预测值
print("------3.model---------")
h=model(w, text_data)
#4.输出误差率
print("-------4.error value-----")
error,numbe=error_rate(h,text_label_data)
print(error)
print(text_label_data)
print(h)
print(w)
#print(feature_data.shape[1])
#print(label_data)
#5.预测数值
# print("--------5.furture value-----")
# h1=model(w,text_data)
对率回归函数(存误)
最新推荐文章于 2023-07-30 15:26:19 发布