import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import roc_curve, auc, brier_score_loss, calibration_curve
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import learn2learn as l2l
# 数据准备
data = pd.read_csv('heart_disease_data.csv')
# 特征和目标
X = data.drop('target', axis=1)
y = data['target']
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 定义模型
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.fc1 = nn.Linear(13, 64) # 假设特征维度是13
self.fc2 = nn.Linear(64, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.fc2(x)
x = self.sigmoid(x)
return x
# 初始化模型、优化器和损失函数
model = SimpleModel()
meta_optimizer = l2l.optimizers.MAML(model, lr=0.01)
criterion = nn.BCELoss()
# 创建数据集和数据加载器
dataset = TensorDataset(torch.tensor(X_train.values, dtype=torch.float32), torch.tensor(y_train.values, dtype=torch.float32))
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
# 训练函数
def train_meta(model, data_loader, meta_optimizer, num_epochs=10):
for epoch in range(num_epochs):
for X_batch, y_batch in data_loader:
X_batch, y_batch = torch.tensor(X_batch, dtype=torch.float32), torch.tensor(y_batch, dtype=torch.float32)
# 内循环
meta_optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred.squeeze(), y_batch)
loss.backward()
meta_optimizer.step()
# 外循环
meta_optimizer.step()
print(f'Epoch {epoch+1}/{num_epochs} completed')
# 训练模型
train_meta(model, data_loader, meta_optimizer)
# 测试模型
model.eval()
with torch.no_grad():
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_prob = model(X_test_tensor).squeeze().numpy()
y_pred = (y_prob > 0.5).astype(int)
# 计算指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
print(f'AUC-ROC: {roc_auc:.2f}')
# 绘制评估曲线
# ROC曲线
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
# DCA曲线
def dca_curve(y_true, y_prob, thresholds=np.linspace(0, 1, 10)):
dca_results = []
for threshold in thresholds:
y_pred = (y_prob >= threshold).astype(int)
tp = np.sum((y_pred == 1) & (y_true == 1))
fp = np.sum((y_pred == 1) & (y_true == 0))
tn = np.sum((y_pred == 0) & (y_true == 0))
fn = np.sum((y_pred == 0) & (y_true == 1))
net_benefit = (tp - fp * (threshold / (1 - threshold))) / len(y_true)
dca_results.append(net_benefit)
return dca_results
thresholds = np.linspace(0, 1, 10)
dca_results = dca_curve(y_test, y_prob, thresholds)
# 校准曲线
prob_true, prob_pred = calibration_curve(y_test, y_prob, n_bins=10, strategy='uniform')
# 绘制曲线
plt.figure(figsize=(15, 5))
# ROC曲线
plt.subplot(1, 3, 1)
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='grey', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
# DCA曲线
plt.subplot(1, 3, 2)
plt.plot(thresholds, dca_results, marker='o')
plt.xlabel('Threshold')
plt.ylabel('Net Benefit')
plt.title('DCA Curve')
# 校准曲线
plt.subplot(1, 3, 3)
plt.plot(prob_pred, prob_true, marker='o', label='Calibration curve')
plt.plot([0, 1], [0, 1], color='grey', linestyle='--')
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positives')
plt.title('Calibration Curve')
plt.tight_layout()
plt.show()
使用元学习进行心脏病分类的完整示例,包括数据准备、模型训练(使用MAML框架)
最新推荐文章于 2024-10-15 10:20:47 发布