1.引入模块
import numpy as np
from sklearn.datasets import load_iris
这两模块需要从终端下载 numpy,在终端输入 pip install numpy 命令下载scikit-learn需要在终端输入pip install scikit-learn命令从 scikit-learn 库的 datasets 模块中导入 load_iris 函数。load_iris 函数用于加载鸢尾花数据集。
2.引入数据,打乱数据 并将数据划分为训练集 验机集 测试集
iris = load_iris()
X = iris.data
y = iris.target
def train_val_test_split(X, y, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
total_size = len(X)
train_size = int(total_size * train_ratio)
val_size = int(total_size * val_ratio)
test_size = total_size - train_size - val_size
indices = list(range(total_size))
np.random.shuffle(indices)
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size + val_size]
test_indices = indices[train_size + val_size:]
X_train = X[train_indices]
X_val = X[val_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_val = y[val_indices]
y_test = y[test_indices]
return X_train, X_val, X_test, y_train, y_val, y_test
利用np.random.shuffle(indices)得到一个顺序随机的列表,在利用列表索引出随机的数据集
3.编写逻辑回归模型类
class LogisticRegression:
def __init__(self, features_num, class_num, rate, num):
self.features_num = features_num
self.class_num = class_num
self.rate = rate
self.num = num
self.weights = np.random.randn(self.features_num, self.class_num)
self.bias = np.zeros(self.class_num)
def Softmax(self, z):
exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
z_sum = np.sum(exp_z, axis=1, keepdims=True)
return exp_z / z_sum
def loss(self, X, y):
m = X.shape[0]
pro = self.Softmax(np.dot(X, self.weights) + self.bias)
log_pro = np.log(pro)
loss = -np.sum(log_pro * y) / m
return loss
def descore(self, X, y):
m = X.shape[0]
# 将 y 转换为独热编码格式
y_one_hot = np.zeros((m, self.class_num))
y_one_hot[np.arange(m), y] = 1
for i in range(self.num):
prob = self.Softmax(np.dot(X, self.weights) + self.bias)
gradient = (prob - y_one_hot) / m
dw = np.dot(X.T, gradient)
db = np.sum(gradient, axis=0)
self.weights -= self.rate * dw
self.bias -= self.rate * db
if i % 2000 == 0:
print(f"第 {i} 次迭代: 损失 {self.loss(X, y_one_hot)}")
def predict(self, X):
pros = self.Softmax(np.dot(X, self.weights) + self.bias)
return np.argmax(pros, axis=1)
def accuracy(self, X, y):
predictions = self.predict(X)
accuracy = np.mean(predictions == y)
return accuracy
利用的数学公式
通过函数将其实现
3.调用五次并取平均准确率
num_experiments = 5
total_val_accuracy = 0
total_test_accuracy = 0
for exp in range(num_experiments):
print(f"Experiment {exp + 1}/{num_experiments}:")
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X, y, train_ratio=0.8, val_ratio=0.1,
test_ratio=0.1)
model = LogisticRegression(features_num=X_train.shape[1], class_num=3, rate=0.1, num=8000)
model.descore(X_train, y_train)
val_accuracy = model.accuracy(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy}")
total_val_accuracy += val_accuracy
test_accuracy = model.accuracy(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")
total_test_accuracy += test_accuracy
print()
avg_val_accuracy = total_val_accuracy / num_experiments
avg_test_accuracy = total_test_accuracy / num_experiments
print(f"Avg Validation Accuracy over {num_experiments} experiments: {avg_val_accuracy}")
print(f"Avg Test Accuracy over {num_experiments} experiments: {avg_test_accuracy}")
4.完整代码即输出结果
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X = iris.data
y = iris.target
def train_val_test_split(X, y, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
total_size = len(X)
train_size = int(total_size * train_ratio)
val_size = int(total_size * val_ratio)
test_size = total_size - train_size - val_size
indices = list(range(total_size))
np.random.shuffle(indices)
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size + val_size]
test_indices = indices[train_size + val_size:]
X_train = X[train_indices]
X_val = X[val_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_val = y[val_indices]
y_test = y[test_indices]
return X_train, X_val, X_test, y_train, y_val, y_test
class LogisticRegression:
def __init__(self, features_num, class_num, rate, num):
self.features_num = features_num
self.class_num = class_num
self.rate = rate
self.num = num
self.weights = np.random.randn(self.features_num, self.class_num)
self.bias = np.zeros(self.class_num)
def Softmax(self, z):
exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
z_sum = np.sum(exp_z, axis=1, keepdims=True)
return exp_z / z_sum
def loss(self, X, y):
m = X.shape[0]
pro = self.Softmax(np.dot(X, self.weights) + self.bias)
log_pro = np.log(pro)
loss = -np.sum(log_pro * y) / m
return loss
def descore(self, X, y):
m = X.shape[0]
# 将 y 转换为独热编码格式
y_one_hot = np.zeros((m, self.class_num))
y_one_hot[np.arange(m), y] = 1
for i in range(self.num):
prob = self.Softmax(np.dot(X, self.weights) + self.bias)
gradient = (prob - y_one_hot) / m
dw = np.dot(X.T, gradient)
db = np.sum(gradient, axis=0)
self.weights -= self.rate * dw
self.bias -= self.rate * db
if i % 2000 == 0:
print(f"第 {i} 次迭代: 损失 {self.loss(X, y_one_hot)}")
def predict(self, X):
pros = self.Softmax(np.dot(X, self.weights) + self.bias)
return np.argmax(pros, axis=1)
def accuracy(self, X, y):
predictions = self.predict(X)
accuracy = np.mean(predictions == y)
return accuracy
num_experiments = 5
total_val_accuracy = 0
total_test_accuracy = 0
for exp in range(num_experiments):
print(f"Experiment {exp + 1}/{num_experiments}:")
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X, y, train_ratio=0.8, val_ratio=0.1,
test_ratio=0.1)
model = LogisticRegression(features_num=X_train.shape[1], class_num=3, rate=0.1, num=8000)
model.descore(X_train, y_train)
val_accuracy = model.accuracy(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy}")
total_val_accuracy += val_accuracy
test_accuracy = model.accuracy(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")
total_test_accuracy += test_accuracy
print()
avg_val_accuracy = total_val_accuracy / num_experiments
avg_test_accuracy = total_test_accuracy / num_experiments
print(f"Avg Validation Accuracy over {num_experiments} experiments: {avg_val_accuracy}")
print(f"Avg Test Accuracy over {num_experiments} experiments: {avg_test_accuracy}")