这里的前置工作是数据分析,将特征值抽取出来
def ALG(ALG): if ALG == "机器学习模型训练": st.write(ALG) read = st.sidebar.selectbox("读取文件类型", ("csv", "excle")) title = st.sidebar.text_input('输入地址', 'E:/桌面/项目实训/电商优惠券特征数据') flie = pd.read_csv(title+"."+read, encoding="GBK") n = st.select_slider("滑动显示数量", options=[5, 10, 20, 50, 100]) show = st.selectbox("是否显示表", ("否", "是")) if show == "否": st.write("不显示") elif show == "是": st.dataframe(flie[0:n]) else: pass Fl = st.multiselect("选择特征列",flie.columns) if Fl !=[]: choose = flie[Fl] st.write(choose.describe()) from sklearn.preprocessing import StandardScaler, MinMaxScaler md = st.selectbox("数据标准化", ("标准差标准化StandardScaler" , "最大最小标准化MinMaxScaler")) if md == "标准差标准化StandardScaler": scale = StandardScaler() elif md == "最大最小标准化MinMaxScaler": scale = MinMaxScaler() else: pass cf = st.selectbox("标准化", ("是", "否")) if cf =="是": feature = scale.fit_transform(choose) st.write("均值:",feature.mean()) st.write("方差:",feature.std()) from sklearn.cluster import KMeans num_c = list(range(2, 12)) num_c1 = st.selectbox("选择聚类数量", num_c) RS = st.text_input("聚类随机种子默认0",0) cf = st.selectbox("选择聚类",("否","是")) if cf =="是": model = KMeans(n_clusters=int(num_c1), random_state=int(RS)) model.fit(feature) st.write("聚类中心",model.cluster_centers_) labels = st.select_slider("显示数量", options=[20, 50, 100]) st.write("聚类标签",model.labels_[0:labels]) st.write("聚类标签与数量",pd.Series(model.labels_).value_counts()) def radarplot(model_center=None, label=None): import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 n = len(label) # 特征个数 # 对labels进行封闭,否则会有因为matplotlib版本引起的错误 label = np.concatenate((label, [label[0]])) # 间隔采样,设置雷达图的角度,用于平分切开一个圆面,endpoint设置为False表示随机采样不包括stop的值 angles = np.linspace(0, 2 * np.pi, n, endpoint=False) # 拼接多个数组,使雷达图一圈封闭起来 angles = np.concatenate((angles, [angles[0]])) # 创建一个空白画布 fig = plt.figure(figsize=(8, 8)) fig, ax = plt.subplots()#这个语句是用来传递图形的 # 创建子图,设置极坐标格式,绘制圆形 ax = fig.add_subplot(1, 1, 1, polar=True) # 添加每个特征的标签 ax.set_thetagrids(angles * 180 / np.pi, label) # 设置y轴范围 ax.set_ylim(model_center.min(), model_center.max()) # 添加网格线 ax.grid(True) # 设置备选的折线颜色和样式,防止线条重复 sam = ['r', 'pink', 'g', 'b', 'm', 'y', 'k', '0', 'c'] mak = ['4', '8', 'x', '*', 'd', '_', '.', '+', '|'] labels = [] # 循环添加每个类别的线圈 for i in range(len(model_center)): values = np.concatenate((model_center[i], [model_center[i][0]])) ax.plot(angles, values, c=sam[i], marker=mak[i]) plt.yticks(fontsize=15) plt.xticks(fontsize=15) labels.append(str(i)+"类") plt.legend(labels, fontsize=15) st.pyplot(fig) show_radarplot = st.selectbox("是否显示雷达图", ("是", "否")) if show_radarplot =="是": radarplot(model.cluster_centers_, choose.columns) else: pass num_s = list([0.1,0.15,0.2, 0.25,0.3,0.35, 0.4,0.45, 0.5, 0.55,0.6,0.65, 0.7,0.75, 0.8,0.85, 0.9]) num_s1 = st.selectbox("选择测试集比例", num_s) RS1 = st.text_input("分类随机种子默认0", 0) cs = st.selectbox("选择分类", ("是", "否")) if cs =="是": from sklearn.model_selection import train_test_split train_feature, test_feature, y_train, y_test = train_test_split(feature, model.labels_, test_size=num_s1, random_state=int(RS1)) st.write("模型分类",train_feature.shape, test_feature.shape) C = st.selectbox("选择分类方法", ("决策树", "KNN","朴素贝叶斯")) if C =="决策树": from sklearn.tree import DecisionTreeClassifier model = DecisionTreeClassifier() # 构建模型 model.fit(train_feature, y_train) # 模型训练 elif C =="KNN": C_KNN = st.select_slider("KNN数据数量", options=[100,500,1000,2000,5000,10000]) n_neighbors = st.select_slider("n_neighbors数量默认5",options=[2,3,5, 10, 15,20]) from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier(n_neighbors=n_neighbors) model.fit(train_feature[:C_KNN], y_train[:C_KNN]) # 模型训练 elif C =="朴素贝叶斯": from sklearn.naive_bayes import GaussianNB model = GaussianNB() # 构建模型 model.fit(train_feature, y_train) # 模型训练 else: pass y_pre = model.predict(test_feature) pre = st.select_slider("控制显示数量", options=[20, 50, 100]) st.write(y_pre[0:pre]) from sklearn.metrics import precision_score, recall_score from sklearn.metrics import f1_score, confusion_matrix st.write('模型的精度为:', precision_score(y_test, y_pre, average='macro')) st.write('模型的召回率为:', recall_score(y_test, y_pre, average='macro')) st.write('模型的混淆矩阵为:\n', confusion_matrix(y_test, y_pre)) from sklearn.metrics import classification_report st.write('分类模型的性能评估报告:\n', classification_report(y_test, y_pre)) else: pass
def ALG(ALG):
if ALG == "机器学习模型训练":
st.write(ALG)
read = st.sidebar.selectbox("读取文件类型", ("csv", "excle"))
title = st.sidebar.text_input('输入地址', 'E:/桌面/项目实训/电商优惠券特征数据')
flie = pd.read_csv(title+"."+read, encoding="GBK")
n = st.select_slider("滑动显示数量", options=[5, 10, 20, 50, 100])
show = st.selectbox("是否显示表", ("否", "是"))
if show == "否":
st.write("不显示")
elif show == "是":
st.dataframe(flie[0:n])
else:
pass
Fl = st.multiselect("选择特征列",flie.columns)
if Fl !=[]:
choose = flie[Fl]
st.write(choose.describe())
from sklearn.preprocessing import StandardScaler, MinMaxScaler
md = st.selectbox("数据标准化", ("标准差标准化StandardScaler"
, "最大最小标准化MinMaxScaler"))
if md == "标准差标准化StandardScaler":
scale = StandardScaler()
elif md == "最大最小标准化MinMaxScaler":
scale = MinMaxScaler()
else:
pass
cf = st.selectbox("标准化", ("是", "否"))
if cf =="是":
feature = scale.fit_transform(choose)
st.write("均值:",feature.mean())
st.write("方差:",feature.std())
from sklearn.cluster import KMeans
num_c = list(range(2, 12))
num_c1 = st.selectbox("选择聚类数量", num_c)
RS = st.text_input("聚类随机种子默认0",0)
cf = st.selectbox("选择聚类",("否","是"))
if cf =="是":
model = KMeans(n_clusters=int(num_c1), random_state=int(RS))
model.fit(feature)
st.write("聚类中心",model.cluster_centers_)
labels = st.select_slider("显示数量", options=[20, 50, 100])
st.write("聚类标签",model.labels_[0:labels])
st.write("聚类标签与数量",pd.Series(model.labels_).value_counts())
def radarplot(model_center=None, label=None):
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
n = len(label) # 特征个数
# 对labels进行封闭,否则会有因为matplotlib版本引起的错误
label = np.concatenate((label, [label[0]]))
# 间隔采样,设置雷达图的角度,用于平分切开一个圆面,endpoint设置为False表示随机采样不包括stop的值
angles = np.linspace(0, 2 * np.pi, n, endpoint=False)
# 拼接多个数组,使雷达图一圈封闭起来
angles = np.concatenate((angles, [angles[0]]))
# 创建一个空白画布
fig = plt.figure(figsize=(8, 8))
fig, ax = plt.subplots()#这个语句是用来传递图形的
# 创建子图,设置极坐标格式,绘制圆形
ax = fig.add_subplot(1, 1, 1, polar=True)
# 添加每个特征的标签
ax.set_thetagrids(angles * 180 / np.pi, label)
# 设置y轴范围
ax.set_ylim(model_center.min(), model_center.max())
# 添加网格线
ax.grid(True)
# 设置备选的折线颜色和样式,防止线条重复
sam = ['r', 'pink', 'g', 'b', 'm', 'y', 'k', '0', 'c']
mak = ['4', '8', 'x', '*', 'd', '_', '.', '+', '|']
labels = []
# 循环添加每个类别的线圈
for i in range(len(model_center)):
values = np.concatenate((model_center[i], [model_center[i][0]]))
ax.plot(angles, values, c=sam[i], marker=mak[i])
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
labels.append(str(i)+"类")
plt.legend(labels, fontsize=15)
st.pyplot(fig)
show_radarplot = st.selectbox("是否显示雷达图", ("是", "否"))
if show_radarplot =="是":
radarplot(model.cluster_centers_, choose.columns)
else:
pass
num_s = list([0.1,0.15,0.2, 0.25,0.3,0.35, 0.4,0.45, 0.5, 0.55,0.6,0.65, 0.7,0.75, 0.8,0.85, 0.9])
num_s1 = st.selectbox("选择测试集比例", num_s)
RS1 = st.text_input("分类随机种子默认0", 0)
cs = st.selectbox("选择分类", ("是", "否"))
if cs =="是":
from sklearn.model_selection import train_test_split
train_feature, test_feature, y_train, y_test = train_test_split(feature, model.labels_,
test_size=num_s1,
random_state=int(RS1))
st.write("模型分类",train_feature.shape, test_feature.shape)
C = st.selectbox("选择分类方法", ("决策树", "KNN","朴素贝叶斯"))
if C =="决策树":
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier() # 构建模型
model.fit(train_feature, y_train) # 模型训练
elif C =="KNN":
C_KNN = st.select_slider("KNN数据数量", options=[100,500,1000,2000,5000,10000])
n_neighbors = st.select_slider("n_neighbors数量默认5",options=[2,3,5, 10, 15,20])
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=n_neighbors)
model.fit(train_feature[:C_KNN], y_train[:C_KNN]) # 模型训练
elif C =="朴素贝叶斯":
from sklearn.naive_bayes import GaussianNB
model = GaussianNB() # 构建模型
model.fit(train_feature, y_train) # 模型训练
else:
pass
y_pre = model.predict(test_feature)
pre = st.select_slider("控制显示数量", options=[20, 50, 100])
st.write(y_pre[0:pre])
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, confusion_matrix
st.write('模型的精度为:', precision_score(y_test, y_pre, average='macro'))
st.write('模型的召回率为:', recall_score(y_test, y_pre, average='macro'))
st.write('模型的混淆矩阵为:\n', confusion_matrix(y_test, y_pre))
from sklearn.metrics import classification_report
st.write('分类模型的性能评估报告:\n', classification_report(y_test, y_pre))
else:
pass
以往的文章中有streamlit的通用模板,包括登录界面的实现还有框架解析
在做的过程中,发现streamlit可以搭载在wifi(局域网上)可以通过局域网打开streamlit达到共享的作用。