假设数据集中特征集合X是一个30010的矩阵,300代表样本数,10代表特征数
目标集合Y是一个3001的矩阵
特征集合到目标集合的转换经历了一个权重矩阵W(10*1)
即是一个线性回归,目标函数为y = wx
对其进行梯度下降法,使损失函数变小,不断拟合
import numpy as np
def get_data(file_name):
with open(file_name) as fil:
X = []
X_one_line = fil.readline()
while X_one_line != '':
X.append([eval(i) for i in X_one_line.split()])
X_one_line = fil.readline()
X = np.array(X)
return X
def get_weight():
W_orig = []
W_input = input("请输入初始权重数组:")
for i in W_input.split():
W_orig.append([eval(i)])
return W_orig
def train_test_sep(X,Y,train_size):
num_of_data = X.shape[0] # 得到数据组数 300组
num_of_train = int(num_of_data*train_size) # 训练集的组数(270
X_train = X[0:num_of_train]
X_test = X[num_of_train:]
Y_train = Y[0:num_of_train]
Y_test = Y[num_of_train:]
return (X_train,X_test,Y_train,Y_test)
def model(X,W):
Y_pred = np.dot(X,W)
return Y_pred
def evalution(Y_pred,Y):
loss = np.sum(np.multiply(Y-Y_pred,Y-Y_pred))
print(loss)
# 1.0198594030883626e-08
# 1.40506601300693e-09
def opitimizer(Y,W,X,lr,iter_num):
for i in range(iter_num):
loss_grad = 2 * (-X.T) @ (Y - X @ W) # 梯度
W -= lr * loss_grad
loss = np.sum(np.multiply(Y - X@W, Y - X@W))
print(i,loss)
return W
if __name__ == "__main__":
X = get_data("x.txt") # 特征集
Y = get_data("y.txt") # 结果集
W = get_weight() # 设定初始权重
W_right = get_data("w.txt") # 正确的权重集
learning_rate = 100
iter_num = 600
train_size = 0.9
dataset = train_test_sep(X,Y,train_size) # 已经分成训练集和测试集的数据库
print("###Training:---")
print("Loss before opitimization:",end="")# 打印梯度下降前的loss
evalution(model(dataset[0], W), dataset[2])
print("###Opitimizing:---")
W_after_opitimizer = opitimizer(dataset[2],W,dataset[0],learning_rate,iter_num)
print("weight after training:\n",W_after_opitimizer)
print("weight in answer:\n",W_right)
print("###Testing:---")
evalution(model(dataset[1],W_after_opitimizer),dataset[3])
# 1 2 3 4 5 6 7 8 9 10
考虑将目标函数改为 y = w1x + w2x2+w3x3
效果并不好。。。
也没有发生过拟合的现象,在训练集中效果不佳
在测试集中效果更烂。。。
import numpy as np
def get_data(file_name):
with open(file_name) as fil:
X = []
X_one_line = fil.readline()
while X_one_line != '':
X.append([eval(i) for i in X_one_line.split()])
X_one_line = fil.readline()
X = np.array(X)
return X
def get_weight(n):
if n == 1:
W_orig = []
W_input = input("请输入初始权重数组:")
for i in W_input.split():
W_orig.append([eval(i)])
return W_orig
if n == 3:
W1_orig = []
W2_orig = []
W3_orig = []
W1_input = input("请输入W1初始权重数组:")
for j in W1_input.split():
W1_orig.append([eval(j)])
W2_input = input("请输入W2初始权重数组:")
for j in W2_input.split():
W2_orig.append([eval(j)])
W3_input = input("请输入W2初始权重数组:")
for j in W3_input.split():
W3_orig.append([eval(j)])
return W1_orig,W2_orig,W3_orig
def train_test_sep(X,Y,train_size):
num_of_data = X.shape[0] # 得到数据组数 300组
num_of_train = int(num_of_data*train_size) # 训练集的组数(270
X_train = X[0:num_of_train]
X_test = X[num_of_train:]
Y_train = Y[0:num_of_train]
Y_test = Y[num_of_train:]
return (X_train,X_test,Y_train,Y_test)
def model(X,W):
Y_pred = np.dot(X,W)
return Y_pred
def model2(X,W1,W2,W3):
Y_pred = np.dot(X,W1) + np.dot(X*X,W2) + np.dot(X*X*X,W3)
return Y_pred
def evalution(Y_pred,Y):
loss = np.sum(np.multiply(Y-Y_pred,Y-Y_pred))
print(loss)
def opitimizer(Y,W,X,lr,iter_num):
for i in range(iter_num):
loss_grad = 2 * (-X.T) @ (Y - X @ W) # 梯度
W -= lr * loss_grad
loss = np.sum(np.multiply(Y - X@W, Y - X@W))
print(i,loss)
return W
def opitimizer2(Y,W1,W2,W3,X,lr,iter_num):
for i in range(iter_num):
common_part = Y - X@W1 - (X*X)@W2 - (X*X*X)@W3
Loss_grad_W1 = 2*(-X.T)@common_part
Loss_grad_W2 = 2*(-(X*X).T)@common_part
Loss_grad_W3 = 2*(-(X*X*X).T)@common_part
W1 -= lr*Loss_grad_W1
W2 -= lr*Loss_grad_W2
W3 -= lr*Loss_grad_W3
Y_pred = np.dot(X,W1) + np.dot(X*X,W2) + np.dot(X*X*X,W3)
loss = np.sum((Y-Y_pred)*(Y-Y_pred))
print(i,loss)
return W1,W2,W3
if __name__ == "__main__":
X = get_data("x.txt") # 特征集
Y = get_data("y.txt") # 结果集
# W = get_weight(0) # 设定初始权重
W1,W2,W3 = get_weight(3)
W_right = get_data("w.txt") # 正确的权重集
learning_rate = 1e-4
iter_num = 100000
train_size = 0.9
dataset = train_test_sep(X,Y,train_size) # 已经分成训练集和测试集的数据库
print("###Training:---")
print("Loss before opitimization:",end="")# 打印梯度下降前的loss
# evalution(model(dataset[0], W), dataset[2])
evalution(model2(dataset[0],W1,W2,W3),dataset[2])
print("###Opitimizing:---")
W1_after,W2_after,W3_after = opitimizer2(dataset[2],W1,W2,W3,dataset[0],learning_rate,iter_num)
# W_after_opitimizer = opitimizer(dataset[2],W,dataset[0],learning_rate,iter_num)
# print("weight after training:\n",W_after_opitimizer)
print("W1 after training:\n",W1_after)
print("W2 after training:\n", W2_after)
print("W3 after training:\n", W3_after)
# print("weight in answer:\n",W_right)
print("###Testing:---")
# evalution(model(dataset[1],W_after_opitimizer),dataset[3])
evalution(model2(dataset[0], W1, W2, W3), dataset[2])
# 1 2 3 4 5 6 7 8 9 10
# 1 2 3 4 5 6 7 8 9 10
# 1 2 3 4 5 6 7 8 9 10
假设发生了过拟合。。
在通读L1 L2范式的详解以及Scikit-learn上基于L1 L2范式正则化的实例大佬文章之后
对鸢尾花数据集进行L1范式正则化:
解释:
在线性回归或者逻辑回归(就是分类啊…)中,目标函数一般可以写作 y = w1x1 + w2x2 + w3x3 + s4x4 + w0
coef就代表着 w1 到 w4这些特征权重
intercept就是w0,是偏移量,是bias
最终的结果[0 -0. 0.16129057 0.32571651]
也说明了Lasso范式对于特征选择很有帮助
也证明了Lasso的稀疏性
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
iris = load_iris()
scaler = StandardScaler()
print(iris)
print("-"*30)
X = scaler.fit_transform(iris["data"])
print(X)
print("-"*30)
Y = iris["target"]
print(Y)
print("-"*30)
names = iris["feature_names"]
print(names)
print("-"*30)
lasso = Lasso(alpha=.3)
print(lasso.fit(X, Y).coef_)
print(lasso.fit(X, Y).intercept_)
# print("Lasso model: ", pretty_print_linear(lasso.coef_, names, sort=True))