以下代码来自链接:Pytsk,本人只是对其进行学习。使用python的TSK工具进行模型搭建
import numpy as np #数学计算
import torch.nn as nn#损失函数或神经网络模型的搭建,构建深度学习和神经网络结合的模型
from sklearn.datasets import make_classification#机器学习分类数据集制造
from sklearn.metrics import accuracy_score#计算准确率
from sklearn.model_selection import train_test_split#数据集划分
from sklearn.preprocessing import StandardScaler#数据预处理,标准化
from torch.optim import AdamW#优化器
'''作者自己写的TSK库,主要是计算先验参数以及模型训练'''
from pytsk.gradient_descent.antecedent import AntecedentGMF, antecedent_init_center#模糊集构建和中心值和标准差的计算
from pytsk.gradient_descent.callbacks import EarlyStoppingACC#停止条件
from pytsk.gradient_descent.training import Wrapper#模型训练
from pytsk.gradient_descent.tsk import TSK#基本TSK模糊系统
#准备数据集训练
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2) # X: [n_samples, n_features], y: [n_samples, 1]
n_class = len(np.unique(y)) # Num. of class,也可以自己定义分类树
# Z-score,数据预处理,这部分可以修改为新的方法
ss = StandardScaler()
x_train = ss.fit_transform(x_train)
x_test = ss.transform(x_test)
# Define TSK model parameters
n_rule = 30 # Num. of rules
lr = 0.01 # learning rate
weight_decay = 1e-8
consbn = False
order = 1#one-order TSK Fuzzy System
# --------- Define antecedent ------------
init_center = antecedent_init_center(x_train, y_train, n_rule=n_rule)#默认是Kmeans聚类计算中心和标准差
#模糊规则前件部分计算,这里第一个是高斯隶属度函数计算模糊等级,后面两个是LN和ReLu结合,可看我上一篇文章内容找到论文推导
gmf = nn.Sequential(
AntecedentGMF(in_dim=X.shape[1], n_rule=n_rule, high_dim=True, init_center=init_center),
nn.LayerNorm(n_rule),
nn.ReLU()
)
# --------- Define full TSK model ------------
model = TSK(in_dim=X.shape[1], out_dim=n_class, n_rule=n_rule, antecedent=gmf, order=order, precons=None)
#这里的precons可以是深度学习模型,他的输出就是TSK的输入,优化创新可以从这里入手
# ----------------- optimizer ----------------------------
ante_param, other_param = [], []
for n, p in model.named_parameters():
if "center" in n or "sigma" in n:
ante_param.append(p)
else:
other_param.append(p)
optimizer = AdamW(
[{'params': ante_param, "weight_decay": 0},
{'params': other_param, "weight_decay": weight_decay},],
lr=lr
)
# ----------------- split 10% data for earlystopping -----------------
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
# ----------------- define the earlystopping callback -----------------
EACC = EarlyStoppingACC(x_val, y_val, verbose=1, patience=20, save_path="tmp.pkl")
#这里只需要关注EarlyStoppingAcc的patience即可,他表示多少次无提升则停止模型训练,并保存模型
wrapper = Wrapper(model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(),
epochs=300, callbacks=[EACC])
#模型训练,优化器和损失函数
wrapper.fit(x_train, y_train)#模型拟合
wrapper.load("tmp.pkl")#模型保存
y_pred = wrapper.predict(x_test).argmax(axis=1)#预测结果
print("[TSK] ACC: {:.4f}".format(accuracy_score(y_test, y_pred)))#准确率