import torch
import matplotlib.pyplot as plt
torch.manual_seed(0)
X = torch.linspace(0, 2 * torch.pi, 100).reshape(-1, 1)
indices = torch.randperm(X.size(0))
X_train = X[indices[:80]].sort(dim=0)[0]
X_test = X[indices[80:]].sort(dim=0)[0]
y_train = torch.sin(X_train)
y_test = torch.sin(X_test)
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_train_mean) / X_train_std
weights = torch.randn(5, requires_grad=False) * 0.01
learning_rate = 0.01
num_iterations = 10000
def polynomial(x, weights):
return (
weights[4] * x * x * x * x +
weights[3] * x * x * x +
weights[2] * x * x +
weights[1] * x +
weights[0]
)
def loss_function(y_pred, y_true):
return torch.mean((y_pred - y_true) ** 2)
def compute_gradient(X, y, weights):
y_pred = polynomial(X, weights)
error = y_pred - y
grad_w0 = (2 / len(y)) * torch.sum(error)
grad_w1 = (2 / len(y)) * torch.sum(error * X)
grad_w2 = (2 / len(y)) * torch.sum(error * X * X)
grad_w3 = (2 / len(y)) * torch.sum(error * X * X * X)
grad_w4 = (2 / len(y)) * torch.sum(error * X * X * X * X)
return torch.tensor([grad_w0, grad_w1, grad_w2, grad_w3, grad_w4])
for i in range(num_iterations):
y_pred = polynomial(X_train, weights)
loss = loss_function(y_pred, y_train)
grads = compute_gradient(X_train, y_train, weights)
with torch.no_grad():
weights -= learning_rate * grads
if i % 1000 == 0:
print(f"Iteration {i}: Loss = {loss.item()}")
print(f"Final parameters: {weights.data}")
y_pred_train = polynomial(X_train, weights).detach()
y_pred_test = polynomial(X_test, weights).detach()
X_train = X_train * X_train_std + X_train_mean
X_test = X_test * X_train_std + X_train_mean
import torch
import torch.nn.functional as F
def softmax(x):
"""
计算softmax。
Softmax函数用于将多维输入转化为概率分布格式。
"""
exp_x = torch.exp(x)
return exp_x / exp_x.sum(dim=1, keepdim=True)
def cross_entropy_loss(y_true, y_pred):
"""
计算交叉熵损失。
y_true: PyTorch 张量,存储真实标签索引。
y_pred: PyTorch 张量,存储logits(未经过 softmax 的预测值)。
"""
print(y_pred)
y_pred = softmax(y_pred)
print(y_pred)
y_true_one_hot = F.one_hot(y_true, num_classes=y_pred.size(1)).float()
print(y_true_one_hot)
loss = -torch.sum(y_true_one_hot * torch.log(y_pred))
print(loss)
return loss / y_true.size(0)
y_true = torch.tensor([0, 1, 2])
y_pred = torch.tensor([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.2, 0.3, 0.5]])
loss = cross_entropy_loss(y_true, y_pred)
print("交叉熵损失:", loss.item())
import torch
import torch.nn as nn
import torch.nn.functional as F
class MultiHeadSelfAttention(nn.Module):
def __init__(self, num_heads, input_dim, hidden_dim):
super().__init__()
self.num_heads = num_heads
self.hidden_dim = hidden_dim
self.input_dim = input_dim
self.W_q = nn.Linear(input_dim, num_heads * hidden_dim)
self.W_k = nn.Linear(input_dim, num_heads * hidden_dim)
self.W_v = nn.Linear(input_dim, num_heads * hidden_dim)
self.W_o = nn.Linear(num_heads * hidden_dim, input_dim)
def forward(self, x: torch.Tensor) -> torch.torch:
bs, seq_len, _ = x.size()
q = self.W_q(x).reshape(bs, seq_len, self.num_heads, self.hidden_dim).permute(0, 2, 1, 3)
k = self.W_k(x).reshape(bs, seq_len, self.num_heads, self.hidden_dim).permute(0, 2, 1, 3)
v = self.W_v(x).reshape(bs, seq_len, self.num_heads, self.hidden_dim).permute(0, 2, 1, 3)
att_score = F.softmax(q @ k.transpose(2, 3) / (self.hidden_dim ** 0.5),dim = -1) @ v
att_score = att_score.permute(0, 2, 1, 3).reshape(bs, seq_len, -1)
return self.W_o(att_score)
input = torch.randn(32, 10, 512)
attn = MultiHeadSelfAttention(8, 512, 64)
print(attn(input).shape)
import torch
import torch.nn.functional as F
def binary_cross_entropy_loss(y_true, y_pred):
"""
y_true: PyTorch张量,包含真实标签 (0 或 1)。
y_pred: PyTorch张量,包含预测的logits(未经过 sigmoid 的值)。
"""
y_pred = torch.sigmoid(y_pred)
loss = - torch.mean(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
return loss
y_true = torch.tensor([0, 1, 1], dtype=torch.float32)
y_pred = torch.tensor([0.2, 0.8, 0.6], dtype=torch.float32)
loss = binary_cross_entropy_loss(y_true, y_pred)
print("二分类交叉熵损失:", loss.item())