以下代码是本人在学习西瓜书时花费两个礼拜根据原理进行原创,若需转载请咨询本人,谢谢!
自我研究模拟
附上连续数值截图
数据截图:
运行截图:
Linear_config.py
"""
Filename: Linear_config
Author: kdd_zyx
Description: 机器学习 - 线性回归
Datas:kdd - 回归
Start: 2018.11.3
End: 2018.11.3
"""
import time as t
import numpy as np
start = 1 # 数据上标
end = -1 # 结尾下标
# str->float
def flt(pro_data):
flt_data = [1.0]
for data in pro_data:
flt_data.append(float(data))
return flt_data
def writ(NUM, result):
try:
with open('kdd{}.txt'.format(NUM), 'a+', encoding='UTF-8') as f:
f.write(result)
f.close()
except Exception as e:
print('Error:', e)
writ(NUM, result)
# 引入训练集
def Lead_dataset():
try:
pro_dataset = []
end_dataset = []
txt_dataset = []
f = open('train.txt', 'r', encoding='UTF-8')
for line in f.readlines():
data = line.strip().split(',')
pro_dataset.append(flt(data[ :end]))
end_dataset.append(float(data[end]))
f = open('text.txt', 'r', encoding='UTF-8')
for line in f.readlines():
data = line.strip().split(',')
txt_dataset.append(flt(data))
return pro_dataset, end_dataset, txt_dataset
except Exception as e:
print('Error:', e)
Lead_dataset(text_addr, text_splt)
Linear.py
"""
Filename: Linear
Author: kdd_zyx
Description: 机器学习 - 线性回归
Datas:kdd - 回归
Start: 2018.11.3
End: 2018.11.3
"""
from Linear_config import *
# 优化数据存储结构
class dataset:
def __init__(self, pro_dataset, end_dataset):
self.X = np.mat(pro_dataset) # 特征数据集
self.Y = np.mat(end_dataset).T # 类别数据集
self.m = np.shape(self.Y)[0] # 数据行数
self.W = np.mat(np.zeros((self.m, 1))) # 方程参数集
class Linear:
def __init__(self, pro_dataset, end_dataset, txt_dataset):
self.oS = dataset(pro_dataset, end_dataset)
self.xT = np.mat(txt_dataset)
def Verify(self):
RMSE = 0
for i in range(self.oS.m):
yi = np.dot(self.oS.X[i, :], self.oS.W)
print(str(i) + '\t\t' + str(round(float(yi), 5)) + '\t\t' + str(round(float(self.oS.Y[i]), 5)))
RMSE += (yi - self.oS.Y[i]) ** 2
Score = self.Cacl_Score(RMSE)
try:
for i in range(self.xT.shape[0]):
yi = np.dot(self.xT[i, :], self.oS.W)
# writ(0, str(float(yi)) + '\n')
except Exception:
pass
finally:
print("The rmse is:", round(float(Score), 3)) # 打印出正确率
def Linear_Creat(self):
Is_full = self.Is_full()
if Is_full == 1:
self.Full_mat()
self.Verify()
else:
self.Not_full_mat()
self.Verify()
# 判断是否为 满秩矩阵 or 正定矩阵
def Is_full(self):
return 1
def Linear_W(self):
pass
def Full_mat(self):
self.oS.W = np.dot(np.dot(self.oS.X.T, self.oS.X).I, np.dot(self.oS.X.T, self.oS.Y))
def Not_full_mat(self):
pass
def Cacl_Score(self, RMSE):
RMSE /= self.oS.m
RMSE = np.sqrt(RMSE)
Score = 1 / (1 + RMSE)
return Score
if __name__ == '__main__':
sta_time = t.time()
pro_dataset, end_dataset, txt_dataset = Lead_dataset()
linear = Linear(pro_dataset, end_dataset, txt_dataset)
linear.Linear_Creat()
end_time = t.time()
print("Time:", end_time - sta_time)