Python神器Taipy:10分钟构建专业级数据应用的无代码方案
import taipy as tp
from taipy.gui import Gui, notify
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 创建示例数据
data = pd.DataFrame({
"日期": pd.date_range("2023-01-01", periods=100),
"销售额": np.random.randint(1000, 5000, 100),
"利润": np.random.randint(200, 1000, 100)
})
# 定义初始状态
selected_metric = "销售额"
chart_type = "折线图"
date_range = (data["日期"].min(), data["日期"].max())
# 创建简单过滤函数
def filter_data(data, date_min, date_max):
return data[(data["日期"] >= date_min) & (data["日期"] <= date_max)]
# 页面布局
page = """
<|navbar|>
# 销售数据分析仪表板
<|layout|columns=1 1|
<|
### 配置参数
**选择指标**: <|{selected_metric}|selector|lov=销售额;利润|>
**选择图表类型**: <|{chart_type}|selector|lov=折线图;柱状图;面积图|>
**日期范围**: <|{date_range}|date_range|>
<|构建图表|button|on_action=update_chart|>
|>
<|
### 数据可视化
<|{chart}|figure|height=400px|>
|>
|>
### 数据表格
<|{filtered_data}|table|page_size=5|>
"""
# 初始化
filtered_data = data
fig, ax = plt.subplots(figsize=(10, 4))
def plot_chart(df, metric, type):
fig, ax = plt.subplots(figsize=(10, 4))
if type == "折线图":
ax.plot(df["日期"], df[metric], marker='o')
elif type == "柱状图":
ax.bar(df["日期"], df[metric])
elif type == "面积图":
ax.fill_between(df["日期"], df[metric])
ax.set_title(f"{metric}趋势")
ax.set_xlabel("日期")
ax.set_ylabel(metric)
ax.grid(True)
return fig
chart = plot_chart(filtered_data, selected_metric, chart_type)
# 更新图表回调
def update_chart(state):
state.filtered_data = filter_data(data, state.date_range[0], state.date_range[1])
state.chart = plot_chart(state.filtered_data, state.selected_metric, state.chart_type)
notify(state, "info", f"图表已更新: {state.selected_metric} / {state.chart_type}")
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
1. Taipy:为什么数据科学家需要它?
在数据驱动的世界中,将数据分析转化为可交互的应用程序已成为必备技能。然而,传统方法往往要求数据科学家精通前端技术或依赖工程团队,这不仅延缓了开发周期,还限制了迭代速度。
Taipy正是为解决这一痛点而生。它是一个强大的Python框架,允许数据科学家和分析师在几分钟内将数据分析脚本转变为专业级Web应用,无需前端知识。其核心优势包括:
- 低代码/无代码设计:使用简单的Markdown风格语法创建复杂UI
- 无缝Python集成:直接连接到现有Python分析代码
- 交互性与响应式:自动处理状态管理和数据更新
- 完整应用开发平台:从数据处理、任务调度到前端展示的全流程支持
- 企业级可扩展性:适用于从快速原型到生产环境的全过程
无论是创建简单的数据仪表板,还是构建复杂的数据应用,Taipy都能让你专注于数据和业务逻辑,而不是前端开发细节。
2. 安装与基础配置
安装Taipy非常简单:
pip install taipy
如果只需要GUI功能,可以安装更轻量的版本:
pip install taipy-gui
基本的Taipy应用结构包括:
from taipy.gui import Gui
# 定义变量
my_variable = "Hello Taipy!"
# 创建页面布局
page = """
# 我的第一个Taipy应用
这是一个简单的文本: <|{my_variable}|>
"""
# 初始化并运行GUI
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
执行上述代码后,Taipy会自动启动一个Web服务器(默认在http://localhost:5000),并显示你的应用界面。每当变量my_variable
更改时,UI会自动更新。
3. 高级技巧一:创建交互式控件与数据绑定
Taipy提供丰富的UI控件,可以轻松创建交互式应用:
import taipy as tp
from taipy.gui import Gui
import pandas as pd
import numpy as np
# 准备示例数据
data = pd.DataFrame({
"x": np.linspace(0, 10, 100),
"y": np.sin(np.linspace(0, 10, 100))
})
# 定义变量与初始值
text_input = "请输入文本"
slider_value = 50
checkbox_state = False
selected_option = "选项B"
options = ["选项A", "选项B", "选项C"]
multiple_selection = ["选项A"]
date_value = "2023-05-15"
file_path = None
# 页面布局
page = """
# Taipy 控件演示
## 基础输入控件
- 文本输入: <|{text_input}|input|>
- 滑块(0-100): <|{slider_value}|slider|min=0|max=100|>
- 复选框: <|{checkbox_state}|checkbox|label=启用功能|>
- 单选下拉框: <|{selected_option}|selector|lov={options}|>
- 多选框: <|{multiple_selection}|multiselect|lov={options}|>
- 日期选择器: <|{date_value}|date|>
- 文件上传: <|{file_path}|file_selector|label=选择文件|>
## 数据展示
以下是数据表格:
<|{data}|table|page_size=5|>
## 数据可视化
<|{chart}|chart|x=x|y=y|mode=lines|height=400px|>
<|更新图表|button|on_action=update_chart|>
"""
# 创建图表数据
chart = {
"data": data,
"layout": {
"title": "正弦波形图",
"xaxis": {"title": "X值"},
"yaxis": {"title": "Y值"}
}
}
# 回调函数
def update_chart(state):
# 根据滑块值调整振幅
amplitude = state.slider_value / 50 # 将滑块值映射到0-2的振幅
state.data["y"] = amplitude * np.sin(state.data["x"])
# 更新图表标题
state.chart["layout"]["title"] = f"振幅为{amplitude:.2f}的正弦波"
# 如果勾选了复选框,添加随机噪声
if state.checkbox_state:
state.data["y"] += np.random.normal(0, 0.1, len(state.data["y"]))
# 运行应用
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
这个示例展示了多种交互控件,并通过回调函数实现了数据绑定。当用户调整滑块或选择复选框时,图表会实时更新。
4. 高级技巧二:构建复杂布局与导航
Taipy支持创建复杂的布局和多页面应用:
import taipy as tp
from taipy.gui import Gui, navigate
import pandas as pd
import numpy as np
# 创建数据集
sales_data = pd.DataFrame({
"产品": ["A产品", "B产品", "C产品", "D产品"],
"销售额": [12500, 8700, 7300, 15200],
"成本": [5600, 4200, 3800, 7100],
"利润": [6900, 4500, 3500, 8100]
})
analytics_data = pd.DataFrame({
"年龄段": ["18-24", "25-34", "35-44", "45-54", "55+"],
"用户数": [1200, 2800, 2300, 1700, 900],
"转化率": [0.12, 0.18, 0.22, 0.15, 0.08]
})
# 初始页面状态
current_page = "dashboard"
active_tab = "销售"
# 创建不同页面布局
dashboard_page = """
<|navbar|>
<|container|
# 仪表板页面
<|tabs|lov=销售;分析;设置|active={active_tab}|on_change=change_tab|>
<|{active_tab=="销售"}|
## 销售概况
<|layout|columns=1 1|
<|
### 关键指标
- 总销售额: <|{sales_data["销售额"].sum()}|text|format=$,.0f|>
- 总利润: <|{sales_data["利润"].sum()}|text|format=$,.0f|>
- 利润率: <|{sales_data["利润"].sum() / sales_data["销售额"].sum() * 100}|text|format=.1f|>%
|>
<|
### 销售分布
<|{sales_chart}|chart|type=pie|labels=产品|values=销售额|>
|>
|>
### 详细数据
<|{sales_data}|table|>
|>
<|{active_tab=="分析"}|
## 用户分析
<|layout|columns=1 1|
<|
### 用户年龄分布
<|{analytics_data}|chart|type=bar|x=年龄段|y=用户数|>
|>
<|
### 转化率分析
<|{analytics_data}|chart|type=scatter|x=年龄段|y=转化率|mode=lines+markers|>
|>
|>
|>
<|{active_tab=="设置"}|
## 设置
### 图表配置
- 显示标签: <|{show_labels}|checkbox|>
- 图表类型: <|{chart_type}|selector|lov=柱状图;折线图;散点图|>
- 颜色主题: <|{color_theme}|selector|lov=默认;蓝色;绿色;彩虹|>
<|应用设置|button|on_action=apply_settings|>
|>
|>
"""
details_page = """
<|navbar|>
# 详细信息页面
## 产品详情
<|返回仪表板|button|on_action=back_to_dashboard|>
<|layout|columns=2 1|
<|
### 产品销售对比
<|{sales_data}|chart|type=bar|x=产品|y[1]=销售额|y[2]=成本|y[3]=利润|>
|>
<|
### 产品分析
选择产品: <|{selected_product}|selector|lov={sales_data["产品"].tolist()}|>
**选中产品数据:**
- 销售额: <|{get_product_data(selected_product)["销售额"]}|text|format=$,.0f|>
- 成本: <|{get_product_data(selected_product)["成本"]}|text|format=$,.0f|>
- 利润: <|{get_product_data(selected_product)["利润"]}|text|format=$,.0f|>
- 利润率: <|{get_product_data(selected_product)["利润"] / get_product_data(selected_product)["销售额"] * 100}|text|format=.1f|>%
|>
|>
"""
# 应用页面
pages = {
"dashboard": dashboard_page,
"details": details_page
}
# 设置初始状态
show_labels = True
chart_type = "柱状图"
color_theme = "默认"
selected_product = "A产品"
# 准备图表数据
sales_chart = {
"data": sales_data,
"layout": {"title": "各产品销售额占比"}
}
# 回调函数
def change_tab(state, value):
state.active_tab = value
def apply_settings(state):
# 应用图表设置的逻辑
if state.chart_type == "柱状图":
chart_type = "bar"
elif state.chart_type == "折线图":
chart_type = "line"
else:
chart_type = "scatter"
# 更新分析图表
state.analytics_data.type = chart_type
# 应用颜色主题
color_map = {
"默认": None,
"蓝色": "blues",
"绿色": "greens",
"彩虹": "rainbow"
}
# 在实际应用中,这里会更新图表的颜色方案
def back_to_dashboard(state):
navigate(state, "dashboard")
def get_product_data(product):
"""获取指定产品的数据"""
return sales_data[sales_data["产品"] == product].iloc[0]
def on_navigate(state, page, payload):
"""页面导航处理函数"""
state.current_page = page
# 创建多页面应用
if __name__ == "__main__":
gui = Gui(pages=pages)
gui.run(debug=True)
这个示例展示了如何创建带有选项卡、多页面导航和复杂布局的应用。通过<|layout|columns=1 1|>
可以创建网格布局,而<|tabs|>
支持选项卡式导航。
5. 高级技巧三:数据分析与可视化集成
Taipy可以与Pandas、NumPy、Matplotlib和Plotly等数据分析工具无缝集成:
import taipy as tp
from taipy.gui import Gui
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
# 加载示例数据集
def load_data():
# 使用鸢尾花数据集作为示例
from sklearn.datasets import load_iris
iris = load_iris()
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
return data
# 初始化数据和状态
data = load_data()
selected_feature_x = data.columns[0]
selected_feature_y = data.columns[1]
cluster_count = 3
apply_pca = False
apply_scaling = True
chart_engine = "Plotly"
# 创建分析函数
def run_clustering(data, n_clusters=3, apply_scaling=True, apply_pca=False):
"""运行聚类分析"""
# 提取特征列
features = data.select_dtypes(include=[np.number]).columns
X = data[features].values
# 数据预处理
if apply_scaling:
X = StandardScaler().fit_transform(X)
# 应用PCA
if apply_pca:
pca = PCA(n_components=2)
X = pca.fit_transform(X)
# 创建新的DataFrame用于可视化
cluster_data = pd.DataFrame(X, columns=['PC1', 'PC2'])
else:
cluster_data = pd.DataFrame(X, columns=features)
# 应用K均值聚类
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_data['cluster'] = kmeans.fit_predict(X)
# 如果原始数据有目标变量,添加到结果
if 'species' in data.columns:
cluster_data['species'] = data['species'].values
return cluster_data
# 创建图表生成函数
def create_matplotlib_chart(data, x_col, y_col):
"""使用Matplotlib创建散点图"""
fig, ax = plt.subplots(figsize=(10, 6))
# 如果有物种信息,按物种着色
if 'species' in data.columns:
species = data['species'].unique()
for s in species:
subset = data[data['species'] == s]
ax.scatter(subset[x_col], subset[y_col], label=s, alpha=0.7)
ax.legend()
else:
ax.scatter(data[x_col], data[y_col], alpha=0.7)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
ax.set_title(f"{x_col} vs {y_col}")
ax.grid(True, linestyle='--', alpha=0.7)
return fig
def create_plotly_chart(data, x_col, y_col):
"""使用Plotly创建交互式散点图"""
if 'species' in data.columns:
fig = px.scatter(
data, x=x_col, y=y_col, color='species',
title=f"{x_col} vs {y_col}",
labels={x_col: x_col, y_col: y_col},
hover_data=['species']
)
elif 'cluster' in data.columns:
fig = px.scatter(
data, x=x_col, y=y_col, color='cluster',
title=f"{x_col} vs {y_col} (聚类结果)",
labels={x_col: x_col, y_col: y_col},
color_continuous_scale='viridis'
)
else:
fig = px.scatter(
data, x=x_col, y=y_col,
title=f"{x_col} vs {y_col}"
)
fig.update_layout(
template='plotly_white',
legend_title_text='分类',
height=500
)
return fig
# 更新图表的函数
def update_chart(state):
# 运行聚类分析
if state.apply_clustering:
processed_data = run_clustering(
state.data,
n_clusters=state.cluster_count,
apply_scaling=state.apply_scaling,
apply_pca=state.apply_pca
)
# 更新特征选择器(如果使用了PCA)
if state.apply_pca:
state.features = ['PC1', 'PC2']
state.selected_feature_x = 'PC1'
state.selected_feature_y = 'PC2'
else:
state.features = state.data.select_dtypes(include=[np.number]).columns.tolist()
else:
processed_data = state.data
# 根据所选引擎创建图表
if state.chart_engine == "Matplotlib":
state.chart = create_matplotlib_chart(
processed_data,
state.selected_feature_x,
state.selected_feature_y
)
else: # Plotly
state.chart = create_plotly_chart(
processed_data,
state.selected_feature_x,
state.selected_feature_y
)
# 更新数据表
state.display_data = processed_data
# 初始化状态
features = data.select_dtypes(include=[np.number]).columns.tolist()
apply_clustering = False
display_data = data
# 初始图表
chart = create_plotly_chart(data, selected_feature_x, selected_feature_y)
# 页面布局
page = """
# 数据分析与可视化工作台
<|layout|columns=1 2|
<|
## 分析参数
### 数据选择
- X轴特征: <|{selected_feature_x}|selector|lov={features}|>
- Y轴特征: <|{selected_feature_y}|selector|lov={features}|>
### 高级分析
<|{apply_clustering}|checkbox|label=应用聚类分析|>
<|layout|gap=10px|class_name=card mt-2 p-3 border-light|
#### 聚类参数
<|{apply_clustering}|
- 聚类数量: <|{cluster_count}|slider|min=2|max=10|step=1|>
- <|{apply_scaling}|checkbox|label=标准化数据|>
- <|{apply_pca}|checkbox|label=应用PCA降维|>
|>
### 可视化设置
- 图表引擎: <|{chart_engine}|selector|lov=Plotly;Matplotlib|>
<|更新图表|button|on_action=update_chart|>
|>
<|
## 数据可视化
<|{chart}|chart|height=500px|>
### 数据表
<|{display_data}|table|height=300px|page_size=10|>
|>
|>
"""
# 运行应用
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
这个示例展示了如何创建一个功能完整的数据分析工作台,集成了常用的数据分析和可视化库。用户可以交互式地选择特征、应用聚类分析,并查看结果。
6. 高级技巧四:数据处理工作流与调度任务
Taipy不仅提供GUI功能,还包括强大的任务管理和数据工作流系统:
import taipy as tp
from taipy.gui import Gui, notify
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import os
# 定义数据节点
data_node_cfg = {
"raw_data": tp.Config.configure_data_node(id="raw_data"),
"processed_data": tp.Config.configure_data_node(id="processed_data"),
"model": tp.Config.configure_data_node(id="model"),
"predictions": tp.Config.configure_data_node(id="predictions"),
"performance": tp.Config.configure_data_node(id="performance")
}
# 定义任务函数
def load_data(job_id=None):
"""模拟数据加载任务"""
print(f"开始加载数据... (Job ID: {job_id})")
time.sleep(2) # 模拟耗时操作
# 创建示例数据
dates = pd.date_range(start='2023-01-01', periods=100, freq='D')
values = np.random.normal(100, 15, size=100) + np.sin(np.linspace(0, 6, 100)) * 20
data = pd.DataFrame({
'date': dates,
'value': values,
'category': np.random.choice(['A', 'B', 'C'], size=100)
})
print(f"数据加载完成,共 {len(data)} 条记录")
return data
def process_data(raw_data, job_id=None):
"""数据处理任务"""
print(f"开始处理数据... (Job ID: {job_id})")
time.sleep(1.5) # 模拟耗时操作
# 简单数据处理
processed = raw_data.copy()
# 添加特征
processed['day_of_week'] = processed['date'].dt.dayofweek
processed['month'] = processed['date'].dt.month
processed['lag_1'] = processed['value'].shift(1).fillna(0)
processed['rolling_mean'] = processed['value'].rolling(window=7, min_periods=1).mean()
# 筛选数据
processed = processed.dropna()
print(f"数据处理完成,处理后 {len(processed)} 条记录")
return processed
def train_model(processed_data, job_id=None):
"""模型训练任务"""
print(f"开始训练模型... (Job ID: {job_id})")
time.sleep(3) # 模拟耗时操作
# 模拟模型训练(实际应用中这里会使用scikit-learn等库)
model = {
'coefficients': np.random.rand(4),
'intercept': np.random.rand(),
'trained_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'features': ['day_of_week', 'month', 'lag_1', 'rolling_mean']
}
print("模型训练完成")
return model
def make_predictions(processed_data, model, job_id=None):
"""预测任务"""
print(f"开始生成预测... (Job ID: {job_id})")
time.sleep(1) # 模拟耗时操作
# 模拟预测过程
predictions = processed_data.copy()
# 使用"模型"计算预测值
X = processed_data[model['features']]
predictions['predicted'] = np.dot(X, model['coefficients']) + model['intercept']
print("预测完成")
return predictions
def evaluate_performance(predictions, job_id=None):
"""评估模型性能任务"""
print(f"开始评估性能... (Job ID: {job_id})")
time.sleep(1) # 模拟耗时操作
# 计算性能指标
actual = predictions['value']
predicted = predictions['predicted']
mse = np.mean((actual - predicted) ** 2)
mae = np.mean(np.abs(actual - predicted))
r2 = 1 - np.sum((actual - predicted) ** 2) / np.sum((actual - np.mean(actual)) ** 2)
performance = {
'mse': mse,
'mae': mae,
'r2': r2,
'evaluated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}
print(f"评估完成,MSE: {mse:.2f}, MAE: {mae:.2f}, R²: {r2:.2f}")
return performance
# 配置任务
task_cfg = {
"load_data": tp.Config.configure_task(
id="load_data",
function=load_data,
outputs=[data_node_cfg["raw_data"]]
),
"process_data": tp.Config.configure_task(
id="process_data",
function=process_data,
inputs=[data_node_cfg["raw_data"]],
outputs=[data_node_cfg["processed_data"]]
),
"train_model": tp.Config.configure_task(
id="train_model",
function=train_model,
inputs=[data_node_cfg["processed_data"]],
outputs=[data_node_cfg["model"]]
),
"make_predictions": tp.Config.configure_task(
id="make_predictions",
function=make_predictions,
inputs=[data_node_cfg["processed_data"], data_node_cfg["model"]],
outputs=[data_node_cfg["predictions"]]
),
"evaluate_performance": tp.Config.configure_task(
id="evaluate_performance",
function=evaluate_performance,
inputs=[data_node_cfg["predictions"]],
outputs=[data_node_cfg["performance"]]
)
}
# 创建工作流
pipeline_cfg = tp.Config.configure_pipeline(
id="data_pipeline",
tasks=[
task_cfg["load_data"],
task_cfg["process_data"],
task_cfg["train_model"],
task_cfg["make_predictions"],
task_cfg["evaluate_performance"]
]
)
# 创建场景(scenario)
scenario_cfg = tp.Config.configure_scenario(
id="data_analysis",
pipeline_configs=[pipeline_cfg]
)
# 配置和初始化Taipy Core
config = tp.Config.configure()
tp.Core().run()
# 创建和提交场景的函数
def create_and_run_scenario(state):
"""创建并运行场景"""
scenario = tp.create_scenario(scenario_cfg)
state.current_scenario = scenario
state.scenario_status = "已创建"
state.scenario_id = scenario.id
state.task_statuses = get_task_statuses(scenario)
notify(state, "success", f"成功创建场景 ID: {scenario.id}")
def submit_scenario(state):
"""提交场景执行"""
if hasattr(state, 'current_scenario'):
scenario = state.current_scenario
tp.submit(scenario)
state.scenario_status = "运行中"
notify(state, "info", f"场景 {scenario.id} 已提交")
# 实时更新任务状态
state.task_statuses = get_task_statuses(scenario)
else:
notify(state, "error", "没有活动场景,请先创建")
def get_task_statuses(scenario):
"""获取任务状态"""
statuses = {}
for task_name in ["load_data", "process_data", "train_model", "make_predictions", "evaluate_performance"]:
task = scenario.pipelines["data_pipeline"].tasks[task_name]
statuses[task_name] = {
"status": task.status.name,
"submission_date": task.submission_date.strftime('%Y-%m-%d %H:%M:%S') if task.submission_date else None,
"execution_date": task.execution_date.strftime('%Y-%m-%d %H:%M:%S') if task.execution_date else None
}
return statuses
def update_status(state):
"""更新场景和任务状态"""
if hasattr(state, 'current_scenario'):
scenario = state.current_scenario
state.scenario_status = "完成" if scenario.is_finished() else "运行中"
state.task_statuses = get_task_statuses(scenario)
# 如果场景完成,加载结果
if scenario.is_finished():
state.raw_data = scenario.raw_data.read()
state.processed_data = scenario.processed_data.read()
state.predictions = scenario.predictions.read()
state.performance = scenario.performance.read()
# 准备图表
state.results_chart = create_results_chart(state.predictions)
notify(state, "success", "场景执行完成,已加载结果")
def create_results_chart(predictions_df):
"""创建结果图表"""
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(
x=predictions_df['date'],
y=predictions_df['value'],
mode='lines',
name='实际值',
line=dict(color='blue')
))
fig.add_trace(go.Scatter(
x=predictions_df['date'],
y=predictions_df['predicted'],
mode='lines',
name='预测值',
line=dict(color='red', dash='dash')
))
fig.update_layout(
title='实际值 vs 预测值',
xaxis_title='日期',
yaxis_title='值',
template='plotly_white',
height=500
)
return fig
# 初始化状态变量
scenario_status = "未创建"
scenario_id = None
task_statuses = {}
raw_data = None
processed_data = None
predictions = None
performance = None
results_chart = None
selected_tab = "工作流"
# UI页面
page = """
<|navbar|>
# Taipy 数据工作流演示
<|tabs|lov=工作流;结果;性能|active={selected_tab}|>
<|{selected_tab=="工作流"}|
## 数据处理工作流
<|layout|columns=1 2|
<|
### 工作流控制
场景状态: <|{scenario_status}|text|class_name={"badge " + ("bg-success" if scenario_status == "完成" else "bg-warning" if scenario_status == "运行中" else "bg-secondary")}|>
<|创建场景|button|on_action=create_and_run_scenario|>
<|提交执行|button|on_action=submit_scenario|>
<|刷新状态|button|on_action=update_status|>
|>
<|
### 任务状态
<|{len(task_statuses) > 0}|
| 任务名称 | 状态 | 提交时间 | 执行时间 |
|---------|------|---------|---------|
<|{sorted(task_statuses.items())}|expandable|expanded=True|
| <|{name}|> | <|{status["status"]}|text|class_name={"badge " + ("bg-success" if status["status"] == "COMPLETED" else "bg-warning" if status["status"] == "RUNNING" else "bg-secondary")}|> | <|{status["submission_date"]}|> | <|{status["execution_date"]}|> |
|>
|>
|>
|>
|>
<|{selected_tab=="结果"}|
## 分析结果
<|{predictions is not None}|
<|{results_chart}|chart|height=500px|>
### 预测数据表
<|{predictions}|table|page_size=10|>
|>
<|{predictions is None}|
请先执行工作流以生成结果。
|>
|>
<|{selected_tab=="性能"}|
## 模型性能指标
<|{performance is not None}|
<|layout|columns=1 1 1|
<|
### 均方误差 (MSE)
<|{performance["mse"]}|text|format=,.2f|>
|>
<|
### 平均绝对误差 (MAE)
<|{performance["mae"]}|text|format=,.2f|>
|>
<|
### 决定系数 (R²)
<|{performance["r2"]}|text|format=,.2f|>
|>
|>
评估时间: <|{performance["evaluated_at"]}|>
|>
<|{performance is None}|
请先执行工作流以生成性能指标。
|>
|>
"""
# 运行GUI
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
这个高级示例展示了如何使用Taipy创建完整的数据处理工作流,并通过GUI监控和控制工作流执行。Taipy Core提供了任务依赖管理、数据节点和场景概念,使复杂的数据管道变得易于管理。
7. 高级技巧五:实时数据处理与监控仪表板
Taipy可以处理实时数据并创建动态更新的监控仪表板:
import taipy as tp
from taipy.gui import Gui, notify
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import threading
import time
import datetime
import random
import os
# 创建模拟实时数据的类
class DataSimulator:
def __init__(self, update_interval=1):
"""初始化数据模拟器"""
self.update_interval = update_interval # 更新间隔(秒)
self.running = False
self.thread = None
# 初始化数据
self.cpu_usage = []
self.memory_usage = []
self.network_traffic = []
self.response_times = []
self.error_counts = []
self.timestamps = []
# 异常检测阈值
self.cpu_threshold = 80
self.memory_threshold = 75
self.response_time_threshold = 500
# 状态跟踪
self.alerts = []
self.max_alerts = 100
self.max_data_points = 300 # 保留的最大数据点数
def start(self):
"""启动数据生成线程"""
if not self.running:
self.running = True
self.thread = threading.Thread(target=self._generate_data)
self.thread.daemon = True
self.thread.start()
return "数据生成已启动"
return "数据生成已在运行中"
def stop(self):
"""停止数据生成"""
self.running = False
if self.thread:
self.thread.join(timeout=2)
self.thread = None
return "数据生成已停止"
def _generate_data(self):
"""生成模拟数据的主循环"""
while self.running:
# 生成当前时间戳
now = datetime.datetime.now()
self.timestamps.append(now)
# 模拟CPU使用率 (0-100%)
cpu = min(100, max(0, self._get_next_value(
self.cpu_usage[-1] if self.cpu_usage else 50,
drift=0.5, volatility=3, min_val=10, max_val=95
)))
self.cpu_usage.append(cpu)
# 模拟内存使用率 (0-100%)
memory = min(100, max(0, self._get_next_value(
self.memory_usage[-1] if self.memory_usage else 40,
drift=0.2, volatility=2, min_val=20, max_val=90
)))
self.memory_usage.append(memory)
# 模拟网络流量 (Mbps)
network = max(0, self._get_next_value(
self.network_traffic[-1] if self.network_traffic else 100,
drift=0, volatility=20, min_val=0, max_val=500
))
self.network_traffic.append(network)
# 模拟响应时间 (ms)
response_time = max(10, self._get_next_value(
self.response_times[-1] if self.response_times else 100,
drift=0, volatility=50, min_val=10, max_val=1000
))
self.response_times.append(response_time)
# 模拟错误计数
# 错误率随响应时间上升而增加
error_prob = 0.01 + 0.09 * (max(0, response_time - 200) / 800)
errors = random.randint(0, 3) if random.random() < error_prob else 0
self.error_counts.append(errors)
# 检查是否超过阈值
self._check_alerts(cpu, memory, response_time, now)
# 限制数据点数量
self._trim_data()
# 等待下一个更新周期
time.sleep(self.update_interval)
def _get_next_value(self, current, drift=0, volatility=1, min_val=0, max_val=100):
"""生成带有随机游走特性的下一个值"""
change = drift + volatility * (random.random() - 0.5)
return min(max_val, max(min_val, current + change))
def _check_alerts(self, cpu, memory, response_time, timestamp):
"""检查是否需要触发告警"""
if cpu > self.cpu_threshold:
self.alerts.append({
'timestamp': timestamp,
'level': 'warning' if cpu < 90 else 'critical',
'message': f'CPU使用率过高:{cpu:.1f}%',
'metric': 'CPU',
'value': cpu
})
if memory > self.memory_threshold:
self.alerts.append({
'timestamp': timestamp,
'level': 'warning' if memory < 90 else 'critical',
'message': f'内存使用率过高:{memory:.1f}%',
'metric': 'Memory',
'value': memory
})
if response_time > self.response_time_threshold:
self.alerts.append({
'timestamp': timestamp,
'level': 'warning' if response_time < 800 else 'critical',
'message': f'响应时间过长:{response_time:.1f}ms',
'metric': 'Response Time',
'value': response_time
})
# 限制告警数量
if len(self.alerts) > self.max_alerts:
self.alerts = self.alerts[-self.max_alerts:]
def _trim_data(self):
"""限制数据点数量"""
if len(self.timestamps) > self.max_data_points:
self.timestamps = self.timestamps[-self.max_data_points:]
self.cpu_usage = self.cpu_usage[-self.max_data_points:]
self.memory_usage = self.memory_usage[-self.max_data_points:]
self.network_traffic = self.network_traffic[-self.max_data_points:]
self.response_times = self.response_times[-self.max_data_points:]
self.error_counts = self.error_counts[-self.max_data_points:]
def get_data_frame(self):
"""将数据转换为DataFrame"""
return pd.DataFrame({
'timestamp': self.timestamps,
'cpu_usage': self.cpu_usage,
'memory_usage': self.memory_usage,
'network_traffic': self.network_traffic,
'response_time': self.response_times,
'errors': self.error_counts
})
def get_alerts_frame(self):
"""将告警转换为DataFrame"""
if not self.alerts:
return pd.DataFrame(columns=['timestamp', 'level', 'message', 'metric', 'value'])
return pd.DataFrame(self.alerts)
def reset_data(self):
"""重置所有数据"""
self.cpu_usage = []
self.memory_usage = []
self.network_traffic = []
self.response_times = []
self.error_counts = []
self.timestamps = []
self.alerts = []
return "所有数据已重置"
# 创建图表生成函数
def create_system_metrics_chart(data):
"""创建系统指标图表"""
if len(data) == 0:
return go.Figure()
fig = go.Figure()
# 添加CPU使用率
fig.add_trace(go.Scatter(
x=data['timestamp'],
y=data['cpu_usage'],
mode='lines',
name='CPU使用率 (%)',
line=dict(color='#1f77b4', width=2)
))
# 添加内存使用率
fig.add_trace(go.Scatter(
x=data['timestamp'],
y=data['memory_usage'],
mode='lines',
name='内存使用率 (%)',
line=dict(color='#ff7f0e', width=2)
))
# 更新布局
fig.update_layout(
title='系统资源使用率',
xaxis_title='时间',
yaxis_title='使用率 (%)',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
template='plotly_white',
height=300,
margin=dict(l=10, r=10, b=10, t=40)
)
# 添加警告线
fig.add_shape(
type="line",
x0=data['timestamp'].iloc[0],
y0=80,
x1=data['timestamp'].iloc[-1],
y1=80,
line=dict(color="red", width=1, dash="dash"),
)
return fig
def create_network_chart(data):
"""创建网络流量图表"""
if len(data) == 0:
return go.Figure()
fig = go.Figure()
# 添加网络流量
fig.add_trace(go.Scatter(
x=data['timestamp'],
y=data['network_traffic'],
mode='lines',
name='网络流量 (Mbps)',
line=dict(color='#2ca02c', width=2),
fill='tozeroy'
))
# 更新布局
fig.update_layout(
title='网络流量',
xaxis_title='时间',
yaxis_title='网络流量 (Mbps)',
template='plotly_white',
height=250,
margin=dict(l=10, r=10, b=10, t=40)
)
return fig
def create_response_time_chart(data):
"""创建响应时间图表"""
if len(data) == 0:
return go.Figure()
fig = go.Figure()
# 添加响应时间
fig.add_trace(go.Scatter(
x=data['timestamp'],
y=data['response_time'],
mode='lines',
name='响应时间 (ms)',
line=dict(color='#d62728', width=2)
))
# 添加错误计数(使用次坐标轴)
fig.add_trace(go.Bar(
x=data['timestamp'],
y=data['errors'],
name='错误数',
marker_color='rgba(128, 0, 128, 0.6)',
yaxis='y2'
))
# 更新布局
fig.update_layout(
title='响应时间与错误数',
xaxis_title='时间',
yaxis_title='响应时间 (ms)',
yaxis2=dict(
title='错误数',
overlaying='y',
side='right',
range=[0, max(data['errors'].max() * 1.5, 3)]
),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
template='plotly_white',
height=300,
margin=dict(l=10, r=10, b=10, t=40)
)
# 添加警告线
fig.add_shape(
type="line",
x0=data['timestamp'].iloc[0],
y0=500,
x1=data['timestamp'].iloc[-1],
y1=500,
line=dict(color="red", width=1, dash="dash"),
)
return fig
def create_summary_gauges(data):
"""创建仪表盘摘要"""
if len(data) == 0:
return None
# 获取最新值
latest = data.iloc[-1]
cpu = latest['cpu_usage']
memory = latest['memory_usage']
response_time = latest['response_time']
# 创建仪表图
fig = go.Figure()
# CPU仪表
fig.add_trace(go.Indicator(
mode="gauge+number",
value=cpu,
title={'text': "CPU使用率"},
domain={'x': [0, 0.32], 'y': [0, 1]},
gauge={
'axis': {'range': [0, 100]},
'bar': {'color': "#1f77b4"},
'steps': [
{'range': [0, 60], 'color': "lightgray"},
{'range': [60, 80], 'color': "yellow"},
{'range': [80, 100], 'color': "red"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 80
}
}
))
# 内存仪表
fig.add_trace(go.Indicator(
mode="gauge+number",
value=memory,
title={'text': "内存使用率"},
domain={'x': [0.34, 0.66], 'y': [0, 1]},
gauge={
'axis': {'range': [0, 100]},
'bar': {'color': "#ff7f0e"},
'steps': [
{'range': [0, 60], 'color': "lightgray"},
{'range': [60, 75], 'color': "yellow"},
{'range': [75, 100], 'color': "red"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 75
}
}
))
# 响应时间仪表
fig.add_trace(go.Indicator(
mode="gauge+number",
value=response_time,
title={'text': "响应时间(ms)"},
domain={'x': [0.68, 1], 'y': [0, 1]},
gauge={
'axis': {'range': [0, 1000]},
'bar': {'color': "#d62728"},
'steps': [
{'range': [0, 300], 'color': "lightgray"},
{'range': [300, 500], 'color': "yellow"},
{'range': [500, 1000], 'color': "red"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 500
}
}
))
fig.update_layout(
height=200,
margin=dict(l=10, r=10, b=0, t=30),
)
return fig
# 初始化模拟器
simulator = DataSimulator(update_interval=1)
# 初始状态变量
monitor_running = False
data = pd.DataFrame(columns=['timestamp', 'cpu_usage', 'memory_usage', 'network_traffic', 'response_time', 'errors'])
alerts = pd.DataFrame(columns=['timestamp', 'level', 'message', 'metric', 'value'])
selected_tab = "概览"
summary_gauges = None
system_metrics_chart = create_system_metrics_chart(data)
network_chart = create_network_chart(data)
response_chart = create_response_time_chart(data)
refresh_interval = 2
# 回调函数
def toggle_monitoring(state):
"""切换监控状态"""
if state.monitor_running:
result = simulator.stop()
state.monitor_running = False
else:
result = simulator.start()
state.monitor_running = True
notify(state, "info", result)
def reset_data(state):
"""重置所有数据"""
result = simulator.reset_data()
notify(state, "info", result)
update_dashboard(state)
def update_dashboard(state):
"""更新仪表板数据"""
state.data = simulator.get_data_frame()
state.alerts = simulator.get_alerts_frame()
if not state.data.empty:
# 更新图表
state.summary_gauges = create_summary_gauges(state.data)
state.system_metrics_chart = create_system_metrics_chart(state.data)
state.network_chart = create_network_chart(state.data)
state.response_chart = create_response_time_chart(state.data)
# 自动刷新回调
def on_init(state):
"""初始化时启动自动刷新"""
state.refresh_thread = tp.gui.Timer(refresh_interval)
state.refresh_thread.start(update_dashboard)
def change_refresh_interval(state, value):
"""更改刷新间隔"""
state.refresh_interval = value
if hasattr(state, 'refresh_thread'):
state.refresh_thread.stop()
state.refresh_thread = tp.gui.Timer(value)
state.refresh_thread.start(update_dashboard)
notify(state, "info", f"刷新间隔已设置为 {value} 秒")
# 页面布局
page = """
<|navbar|>
# 实时系统监控仪表板
<|layout|columns=8 4|
<|
<|{monitor_running}|toggle|on_change=toggle_monitoring|class_name=mb-3|lov={False: "🔴 监控已停止", True: "🟢 监控运行中"}|>
|>
<|
<|layout|columns=1 1|
<|更新仪表盘|button|on_action=update_dashboard|>
<|重置数据|button|on_action=reset_data|>
|>
|>
|>
<|tabs|lov=概览;详细指标;告警;设置|active={selected_tab}|>
<|{selected_tab=="概览"}|
## 系统概览
<|{len(data) > 0}|
<|{summary_gauges}|chart|>
<|layout|columns=1 1|
<|
### CPU & 内存使用率
<|{system_metrics_chart}|chart|>
|>
<|
### 网络流量
<|{network_chart}|chart|>
|>
|>
### 响应时间与错误
<|{response_chart}|chart|>
### 最新告警 (最近5条)
<|{len(alerts) > 0}|
<|{alerts.sort_values('timestamp', ascending=False).head(5)}|table|width=100%|>
|>
<|{len(alerts) == 0}|
*暂无告警信息*
|>
|>
<|{len(data) == 0}|
*暂无监控数据。请点击"监控运行中"按钮启动数据收集。*
|>
|>
<|{selected_tab=="详细指标"}|
## 详细系统指标
<|{len(data) > 0}|
<|layout|columns=1 1|
<|
### CPU使用率趋势
<|{system_metrics_chart}|chart|>
|>
<|
### 响应时间分布
<|{data}|chart|type=histogram|x=response_time|nbins=20|title=响应时间分布|>
|>
|>
<|layout|columns=1 1|
<|
### 每分钟错误数
<|{data.assign(minute=data['timestamp'].dt.floor('Min')).groupby('minute')['errors'].sum().reset_index()}|chart|type=bar|x=minute|y=errors|title=每分钟错误数|>
|>
<|
### 资源使用相关性
<|{data}|chart|type=scatter|x=cpu_usage|y=response_time|title=CPU使用率 vs 响应时间|>
|>
|>
### 原始数据
<|{data}|table|page_size=10|>
|>
<|{len(data) == 0}|
*暂无监控数据。请点击"监控运行中"按钮启动数据收集。*
|>
|>
<|{selected_tab=="告警"}|
## 系统告警
<|{len(alerts) > 0}|
### 告警统计
<|layout|columns=1 1 1|
<|
#### 总告警数
<|{len(alerts)}|text|raw|class_name=display-4|>
|>
<|
#### 严重告警数
<|{len(alerts[alerts['level'] == 'critical'])}|text|raw|class_name=display-4 text-danger|>
|>
<|
#### 警告告警数
<|{len(alerts[alerts['level'] == 'warning'])}|text|raw|class_name=display-4 text-warning|>
|>
|>
### 告警分布
<|layout|columns=1 1|
<|
#### 按指标分布
<|{alerts.groupby('metric').size().reset_index(name='count')}|chart|type=pie|labels=metric|values=count|title=告警指标分布|>
|>
<|
#### 按时间分布
<|{alerts.assign(minute=alerts['timestamp'].dt.floor('Min')).groupby('minute').size().reset_index(name='count')}|chart|type=bar|x=minute|y=count|title=每分钟告警数|>
|>
|>
### 详细告警列表
<|{alerts.sort_values('timestamp', ascending=False)}|table|width=100%|>
|>
<|{len(alerts) == 0}|
*暂无告警信息*
|>
|>
<|{selected_tab=="设置"}|
## 监控设置
### 刷新间隔
<|{refresh_interval}|slider|min=1|max=10|step=1|on_change=change_refresh_interval|>
当前刷新间隔: <|{refresh_interval}|> 秒
### 阈值设置
<|alert|type=info|
注意:此处仅为界面演示,实际设置未与模拟器连接。
|>
<|layout|columns=1 1 1|
<|
#### CPU告警阈值
<|80|slider|min=50|max=95|step=5|>
|>
<|
#### 内存告警阈值
<|75|slider|min=50|max=95|step=5|>
|>
<|
#### 响应时间告警阈值
<|500|slider|min=100|max=1000|step=50|>
|>
|>
<|保存设置|button|>
|>
"""
# 运行应用
if __name__ == "__main__":
gui = Gui(page)
gui.add_library("refresh_thread")
gui.run(debug=True, on_init=on_init)
8. 高级技巧六:多页面企业级应用
Taipy适合构建复杂的多页面企业应用:
import taipy as tp
from taipy.gui import Gui, notify, navigate
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import random
import os
# 创建示例数据
def generate_sales_data(rows=1000):
"""生成销售数据"""
# 生成日期范围
end_date = datetime.now()
start_date = end_date - timedelta(days=365)
dates = pd.date_range(start=start_date, end=end_date, periods=rows)
# 产品类别
categories = ['电子产品', '家居用品', '服装', '食品', '图书']
# 销售区域
regions = ['华北', '华东', '华南', '华中', '西北', '西南', '东北']
# 销售渠道
channels = ['线上', '门店', '批发', '代理']
# 生成数据
data = pd.DataFrame({
'date': dates,
'product_category': np.random.choice(categories, size=rows),
'region': np.random.choice(regions, size=rows),
'channel': np.random.choice(channels, size=rows),
'units_sold': np.random.randint(1, 50, size=rows),
'unit_price': np.random.uniform(10, 1000, size=rows).round(2)
})
# 添加计算列
data['revenue'] = (data['units_sold'] * data['unit_price']).round(2)
data['cost'] = (data['unit_price'] * np.random.uniform(0.4, 0.7, size=rows)).round(2)
data['profit'] = (data['revenue'] - (data['cost'] * data['units_sold'])).round(2)
# 添加时间维度
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['quarter'] = data['date'].dt.quarter
data['month_name'] = data['date'].dt.month_name()
data['day_of_week'] = data['date'].dt.day_name()
return data
def generate_customer_data(rows=500):
"""生成客户数据"""
# 省份和城市
provinces = ['北京', '上海', '广东', '江苏', '浙江', '四川', '湖北', '河南']
cities = {
'北京': ['北京'],
'上海': ['上海'],
'广东': ['广州', '深圳', '东莞', '佛山', '珠海'],
'江苏': ['南京', '苏州', '无锡', '常州', '镇江'],
'浙江': ['杭州', '宁波', '温州', '嘉兴', '湖州'],
'四川': ['成都', '绵阳', '德阳', '宜宾', '泸州'],
'湖北': ['武汉', '宜昌', '襄阳', '荆州', '黄石'],
'河南': ['郑州', '洛阳', '开封', '南阳', '许昌']
}
# 客户类型
customer_types = ['个人', '小型企业', '中型企业', '大型企业', '政府']
# 会员等级
membership_levels = ['普通会员', '银卡会员', '金卡会员', '钻石会员', '未注册']
# 购买频率
purchase_frequencies = ['高', '中', '低', '极低']
# 生成唯一客户ID
customer_ids = [f'CUST{i:05d}' for i in range(1, rows+1)]
# 选择省份
province_choices = np.random.choice(provinces, size=rows)
# 基于省份选择城市
cities_list = []
for province in province_choices:
cities_list.append(np.random.choice(cities[province]))
# 生成注册日期
end_date = datetime.now()
start_date = end_date - timedelta(days=1000)
registration_dates = pd.date_range(start=start_date, end=end_date, periods=rows)
# 生成数据框
data = pd.DataFrame({
'customer_id': customer_ids,
'registration_date': registration_dates,
'province': province_choices,
'city': cities_list,
'customer_type': np.random.choice(customer_types, size=rows),
'membership_level': np.random.choice(membership_levels, size=rows),
'purchase_frequency': np.random.choice(purchase_frequencies, size=rows),
'total_purchases': np.random.randint(1, 100, size=rows),
'lifetime_value': np.random.uniform(100, 50000, size=rows).round(2)
})
return data
def generate_product_data(rows=200):
"""生成产品数据"""
# 产品类别
categories = ['电子产品', '家居用品', '服装', '食品', '图书']
# 子类别
subcategories = {
'电子产品': ['智能手机', '笔记本电脑', '平板电脑', '耳机', '智能手表', '相机'],
'家居用品': ['家具', '厨具', '床上用品', '浴室用品', '装饰品'],
'服装': ['上衣', '裤子', '裙子', '外套', '鞋类', '配饰'],
'食品': ['零食', '饮料', '乳制品', '肉类', '蔬果', '调味品'],
'图书': ['小说', '教育', '科技', '艺术', '生活', '历史']
}
# 供应商
suppliers = ['供应商A', '供应商B', '供应商C', '供应商D', '供应商E']
# 状态
statuses = ['在售', '缺货', '停产', '预售']
# 生成唯一产品ID
product_ids = [f'PROD{i:04d}' for i in range(1, rows+1)]
# 选择类别
category_choices = np.random.choice(categories, size=rows)
# 基于类别选择子类别
subcategory_list = []
for category in category_choices:
subcategory_list.append(np.random.choice(subcategories[category]))
# 生成上市日期
end_date = datetime.now()
start_date = end_date - timedelta(days=1500)
launch_dates = pd.date_range(start=start_date, end=end_date, periods=rows)
# 生成数据框
data = pd.DataFrame({
'product_id': product_ids,
'product_name': [f'{sub} {random.choice(["专业版", "标准版", "入门版", "豪华版", ""])}'
for sub in subcategory_list],
'category': category_choices,
'subcategory': subcategory_list,
'supplier': np.random.choice(suppliers, size=rows),
'launch_date': launch_dates,
'price': np.random.uniform(10, 1000, size=rows).round(2),
'cost': np.random.uniform(5, 800, size=rows).round(2),
'stock': np.random.randint(0, 1000, size=rows),
'status': np.random.choice(statuses, size=rows, p=[0.7, 0.1, 0.1, 0.1]),
'rating': np.random.uniform(1, 5, size=rows).round(1)
})
# 添加利润率
data['profit_margin'] = ((data['price'] - data['cost']) / data['price'] * 100).round(1)
return data
# 生成或加载数据
if os.path.exists('sales_data.csv'):
sales_data = pd.read_csv('sales_data.csv')
sales_data['date'] = pd.to_datetime(sales_data['date'])
else:
sales_data = generate_sales_data(2000)
sales_data.to_csv('sales_data.csv', index=False)
if os.path.exists('customer_data.csv'):
customer_data = pd.read_csv('customer_data.csv')
customer_data['registration_date'] = pd.to_datetime(customer_data['registration_date'])
else:
customer_data = generate_customer_data(800)
customer_data.to_csv('customer_data.csv', index=False)
if os.path.exists('product_data.csv'):
product_data = pd.read_csv('product_data.csv')
product_data['launch_date'] = pd.to_datetime(product_data['launch_date'])
else:
product_data = generate_product_data(300)
product_data.to_csv('product_data.csv', index=False)
# 预处理数据
current_year = datetime.now().year
last_year = current_year - 1
sales_current_year = sales_data[sales_data['year'] == current_year]
sales_last_year = sales_data[sales_data['year'] == last_year]
total_revenue = sales_data['revenue'].sum()
total_profit = sales_data['profit'].sum()
total_customers = len(customer_data)
total_products = len(product_data)
# 创建图表函数
def create_revenue_by_category():
"""按类别的收入图表"""
category_revenue = sales_data.groupby('product_category')['revenue'].sum().reset_index()
fig = px.bar(
category_revenue,
x='product_category',
y='revenue',
title='按产品类别的收入',
labels={'product_category': '产品类别', 'revenue': '收入'},
color='product_category',
template='plotly_white'
)
return fig
def create_sales_trend():
"""销售趋势图表"""
monthly_sales = sales_data.groupby([sales_data['date'].dt.to_period('M')])['revenue'].sum().reset_index()
monthly_sales['date'] = monthly_sales['date'].dt.to_timestamp()
fig = px.line(
monthly_sales,
x='date',
y='revenue',
title='月度销售趋势',
labels={'date': '日期', 'revenue': '收入'},
template='plotly_white'
)
return fig
def create_region_map():
"""按区域的销售地图"""
region_sales = sales_data.groupby('region')['revenue'].sum().reset_index()
# 使用简单条形图代替地图
fig = px.bar(
region_sales,
x='region',
y='revenue',
title='按区域的销售额',
labels={'region': '区域', 'revenue': '收入'},
color='region',
template='plotly_white'
)
return fig
def create_customer_analysis():
"""客户分析图表"""
customer_by_type = customer_data.groupby('customer_type')['lifetime_value'].mean().reset_index()
fig = px.bar(
customer_by_type,
x='customer_type',
y='lifetime_value',
title='按客户类型的平均终身价值',
labels={'customer_type': '客户类型', 'lifetime_value': '平均终身价值'},
color='customer_type',
template='plotly_white'
)
return fig
def create_product_performance():
"""产品表现图表"""
product_performance = product_data.sort_values('profit_margin', ascending=False).head(10)
fig = px.bar(
product_performance,
x='product_name',
y='profit_margin',
title='利润率最高的10个产品',
labels={'product_name': '产品名称', 'profit_margin': '利润率 (%)'},
color='category',
template='plotly_white'
)
fig.update_layout(xaxis={'categoryorder':'total descending'})
return fig
# 生成初始图表
revenue_by_category_chart = create_revenue_by_category()
sales_trend_chart = create_sales_trend()
region_map_chart = create_region_map()
customer_analysis_chart = create_customer_analysis()
product_performance_chart = create_product_performance()
# 定义过滤器
filter_year = datetime.now().year
filter_category = "所有类别"
filter_region = "所有区域"
filter_channel = "所有渠道"
categories = ["所有类别"] + sorted(sales_data['product_category'].unique().tolist())
regions = ["所有区域"] + sorted(sales_data['region'].unique().tolist())
channels = ["所有渠道"] + sorted(sales_data['channel'].unique().tolist())
# 过滤器回调函数
def apply_filters(state):
"""应用过滤器"""
filtered_data = sales_data.copy()
# 应用年份过滤器
if state.filter_year != "所有年份":
filtered_data = filtered_data[filtered_data['year'] == state.filter_year]
# 应用类别过滤器
if state.filter_category != "所有类别":
filtered_data = filtered_data[filtered_data['product_category'] == state.filter_category]
# 应用区域过滤器
if state.filter_region != "所有区域":
filtered_data = filtered_data[filtered_data['region'] == state.filter_region]
# 应用渠道过滤器
if state.filter_channel != "所有渠道":
filtered_data = filtered_data[filtered_data['channel'] == state.filter_channel]
# 更新状态
state.filtered_sales_data = filtered_data
# 计算KPI
state.filtered_revenue = filtered_data['revenue'].sum()
state.filtered_profit = filtered_data['profit'].sum()
state.filtered_units = filtered_data['units_sold'].sum()
state.profit_margin = (state.filtered_profit / state.filtered_revenue * 100) if state.filtered_revenue > 0 else 0
# 更新图表
update_charts(state)
notify(state, "success", "过滤器已应用")
def update_charts(state):
"""更新图表"""
# 使用过滤后的数据更新图表
filtered_data = state.filtered_sales_data
# 按类别的收入
category_revenue = filtered_data.groupby('product_category')['revenue'].sum().reset_index()
state.revenue_by_category_chart = px.bar(
category_revenue,
x='product_category',
y='revenue',
title='按产品类别的收入',
labels={'product_category': '产品类别', 'revenue': '收入'},
color='product_category',
template='plotly_white'
)
# 销售趋势
monthly_sales = filtered_data.groupby([filtered_data['date'].dt.to_period('M')])['revenue'].sum().reset_index()
monthly_sales['date'] = monthly_sales['date'].dt.to_timestamp()
state.sales_trend_chart = px.line(
monthly_sales,
x='date',
y='revenue',
title='月度销售趋势',
labels={'date': '日期', 'revenue': '收入'},
template='plotly_white'
)
# 区域销售
region_sales = filtered_data.groupby('region')['revenue'].sum().reset_index()
state.region_map_chart = px.bar(
region_sales,
x='region',
y='revenue',
title='按区域的销售额',
labels={'region': '区域', 'revenue': '收入'},
color='region',
template='plotly_white'
)
# 初始化过滤后的数据
filtered_sales_data = sales_data.copy()
filtered_revenue = filtered_sales_data['revenue'].sum()
filtered_profit = filtered_sales_data['profit'].sum()
filtered_units = filtered_sales_data['units_sold'].sum()
profit_margin = (filtered_profit / filtered_revenue * 100) if filtered_revenue > 0 else 0
# 页面变量
current_page = "dashboard"
active_dashboard_tab = "概览"
active_sales_tab = "销售分析"
active_product_tab = "产品分析"
active_customer_tab = "客户分析"
# 定义页面布局
dashboard_page = """
<|navbar|>
# 企业销售分析仪表板
<|layout|columns=1 1 1 1|
<|
### 总收入
<|{filtered_revenue:,.2f}|text|format=$,.2f|class_name=display-6 fw-bold text-primary|>
|>
<|
### 总利润
<|{filtered_profit:,.2f}|text|format=$,.2f|class_name=display-6 fw-bold text-success|>
|>
<|
### 销售数量
<|{filtered_units:,}|text|format=,.0f|class_name=display-6 fw-bold text-info|>
|>
<|
### 利润率
<|{profit_margin:.1f}%|text|class_name=display-6 fw-bold text-danger|>
|>
|>
## 过滤器
<|layout|columns=1 1 1 1|
<|
**年份**
<|{filter_year}|selector|lov=所有年份;2023;2022;2021|>
|>
<|
**产品类别**
<|{filter_category}|selector|lov={categories}|>
|>
<|
**区域**
<|{filter_region}|selector|lov={regions}|>
|>
<|
**渠道**
<|{filter_channel}|selector|lov={channels}|>
|>
|>
<|应用过滤器|button|on_action=apply_filters|>
<|tabs|lov=概览;销售分析;产品分析;客户分析|active={active_dashboard_tab}|>
<|{active_dashboard_tab=="概览"}|
## 业务概览
<|layout|columns=1 1|
<|
### 按类别的收入
<|{revenue_by_category_chart}|chart|height=400px|>
|>
<|
### 月度销售趋势
<|{sales_trend_chart}|chart|height=400px|>
|>
|>
<|layout|columns=2 1|
<|
### 按区域的销售额
<|{region_map_chart}|chart|height=400px|>
|>
<|
### 产品表现
<|{product_performance_chart}|chart|height=400px|>
|>
|>
|>
<|{active_dashboard_tab=="销售分析"}|
## 销售分析
<|tabs|lov=趋势分析;区域分析;渠道分析|active={active_sales_tab}|>
<|{active_sales_tab=="趋势分析"}|
### 销售趋势分析
<|layout|columns=2 1|
<|
#### 月度销售趋势
<|{sales_trend_chart}|chart|height=400px|>
|>
<|
#### 按季度的销售额
<|{filtered_sales_data.groupby(['year', 'quarter'])['revenue'].sum().reset_index()}|chart|type=bar|x=quarter|y=revenue|color=year|barmode=group|title=按季度的销售额|>
|>
|>
<|layout|columns=1 1|
<|
#### 按周的销售量
<|{filtered_sales_data.groupby(filtered_sales_data['date'].dt.isocalendar().week)['units_sold'].sum().reset_index(name='units_sold').rename(columns={'date': 'week'})}|chart|type=bar|x=week|y=units_sold|title=按周销售量|>
|>
<|
#### 按日的平均销售额
<|{filtered_sales_data.groupby('day_of_week')['revenue'].mean().reset_index()}|chart|type=bar|x=day_of_week|y=revenue|title=每日平均销售额|>
|>
|>
|>
<|{active_sales_tab=="区域分析"}|
### 区域销售分析
<|layout|columns=1 1|
<|
#### 按区域的销售额
<|{region_map_chart}|chart|height=400px|>
|>
<|
#### 按区域的产品类别销售额
<|{filtered_sales_data.groupby(['region', 'product_category'])['revenue'].sum().reset_index()}|chart|type=bar|x=region|y=revenue|color=product_category|title=按区域和产品类别的销售额|>
|>
|>
<|layout|columns=1 1|
<|
#### 区域利润率对比
<|{filtered_sales_data.groupby('region').apply(lambda x: (x['profit'].sum() / x['revenue'].sum() * 100)).reset_index(name='profit_margin')}|chart|type=bar|x=region|y=profit_margin|title=区域利润率(%)|>
|>
<|
#### 按月度的区域销售额
<|{filtered_sales_data.assign(month_year=filtered_sales_data['date'].dt.strftime('%Y-%m')).groupby(['month_year', 'region'])['revenue'].sum().reset_index().sort_values('month_year')}|chart|type=line|x=month_year|y=revenue|color=region|title=月度区域销售额|>
|>
|>
|>
<|{active_sales_tab=="渠道分析"}|
### 销售渠道分析
<|layout|columns=1 1|
<|
#### 按渠道的销售额
<|{filtered_sales_data.groupby('channel')['revenue'].sum().reset_index()}|chart|type=pie|labels=channel|values=revenue|title=按渠道的销售额占比|>
|>
<|
#### 渠道月度趋势
<|{filtered_sales_data.assign(month_year=filtered_sales_data['date'].dt.strftime('%Y-%m')).groupby(['month_year', 'channel'])['revenue'].sum().reset_index().sort_values('month_year')}|chart|type=line|x=month_year|y=revenue|color=channel|title=渠道月度销售趋势|>
|>
|>
<|layout|columns=1 1|
<|
#### 渠道与产品类别
<|{filtered_sales_data.groupby(['channel', 'product_category'])['revenue'].sum().reset_index()}|chart|type=bar|x=channel|y=revenue|color=product_category|title=渠道与产品类别销售额|>
|>
<|
#### 渠道利润率对比
<|{filtered_sales_data.groupby('channel').apply(lambda x: (x['profit'].sum() / x['revenue'].sum() * 100)).reset_index(name='profit_margin')}|chart|type=bar|x=channel|y=profit_margin|title=渠道利润率(%)|>
|>
|>
|>
|>
<|{active_dashboard_tab=="产品分析"}|
## 产品分析
<|tabs|lov=产品表现;产品详情;库存分析|active={active_product_tab}|>
<|{active_product_tab=="产品表现"}|
### 产品表现分析
<|layout|columns=1 1|
<|
#### 利润率最高的产品
<|{product_performance_chart}|chart|height=400px|>
|>
<|
#### 销售量最高的产品
<|{filtered_sales_data.groupby('product_category')['units_sold'].sum().reset_index().sort_values('units_sold', ascending=False)}|chart|type=bar|x=product_category|y=units_sold|title=销售量最高的产品类别|>
|>
|>
<|layout|columns=1 1|
<|
#### 产品类别月度趋势
<|{filtered_sales_data.assign(month_year=filtered_sales_data['date'].dt.strftime('%Y-%m')).groupby(['month_year', 'product_category'])['revenue'].sum().reset_index().sort_values('month_year')}|chart|type=line|x=month_year|y=revenue|color=product_category|title=产品类别月度销售趋势|>
|>
<|
#### 产品类别与渠道
<|{filtered_sales_data.groupby(['product_category', 'channel'])['revenue'].sum().reset_index()}|chart|type=bar|x=product_category|y=revenue|color=channel|title=产品类别与渠道销售额|>
|>
|>
|>
<|{active_product_tab=="产品详情"}|
### 产品详细信息
#### 产品数据表
<|{product_data}|table|height=500px|page_size=10|>
#### 按供应商的产品分布
<|{product_data.groupby(['supplier', 'category'])['product_id'].count().reset_index().rename(columns={'product_id': 'count'})}|chart|type=bar|x=supplier|y=count|color=category|title=按供应商的产品分布|>
|>
<|{active_product_tab=="库存分析"}|
### 库存分析
<|layout|columns=1 1|
<|
#### 按产品类别的库存水平
<|{product_data.groupby('category')['stock'].sum().reset_index()}|chart|type=bar|x=category|y=stock|title=按类别的总库存|>
|>
<|
#### 库存状态分布
<|{product_data.groupby('status')['product_id'].count().reset_index().rename(columns={'product_id':
'count'})}|chart|type=pie|names=status|values=count|title=产品状态分布|>
|>
|>
<|layout|columns=1 1|
<|
#### 库存不足产品
<|{product_data[product_data['stock'] < 50].sort_values('stock')}|table|height=300px|page_size=5|>
|>
<|
#### 库存过高产品
<|{product_data[product_data['stock'] > 500].sort_values('stock', ascending=False)}|table|height=300px|page_size=5|>
|>
|>
|>
|>
<|{active_dashboard_tab=="客户分析"}|
## 客户分析
<|tabs|lov=客户概况;区域分布;价值分析|active={active_customer_tab}|>
<|{active_customer_tab=="客户概况"}|
### 客户概况分析
<|layout|columns=1 1|
<|
#### 按客户类型的分布
<|{customer_data.groupby('customer_type')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=pie|names=customer_type|values=count|title=客户类型分布|>
|>
<|
#### 按会员等级的分布
<|{customer_data.groupby('membership_level')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=pie|names=membership_level|values=count|title=会员等级分布|>
|>
|>
<|layout|columns=1 1|
<|
#### 按购买频率的客户分布
<|{customer_data.groupby('purchase_frequency')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=bar|x=purchase_frequency|y=count|title=购买频率分布|>
|>
<|
#### 客户类型与终身价值
<|{customer_analysis_chart}|chart|height=400px|>
|>
|>
#### 新注册客户趋势
<|{customer_data.assign(month_year=customer_data['registration_date'].dt.strftime('%Y-%m')).groupby('month_year')['customer_id'].count().reset_index().rename(columns={'customer_id': 'new_customers'}).sort_values('month_year')}|chart|type=line|x=month_year|y=new_customers|title=月度新客户注册趋势|>
|>
<|{active_customer_tab=="区域分布"}|
### 客户区域分布
<|layout|columns=1 1|
<|
#### 按省份的客户分布
<|{customer_data.groupby('province')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=bar|x=province|y=count|title=省份客户分布|>
|>
<|
#### 省份客户终身价值
<|{customer_data.groupby('province')['lifetime_value'].mean().reset_index()}|chart|type=bar|x=province|y=lifetime_value|title=省份平均客户终身价值|>
|>
|>
#### 热门城市TOP 10
<|{customer_data.groupby('city')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'}).sort_values('count', ascending=False).head(10)}|chart|type=bar|x=city|y=count|title=客户数量前10城市|>
#### 区域与客户类型交叉分析
<|{customer_data.groupby(['province', 'customer_type'])['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=bar|x=province|y=count|color=customer_type|title=省份与客户类型分布|>
|>
<|{active_customer_tab=="价值分析"}|
### 客户价值分析
<|layout|columns=1 1|
<|
#### 按终身价值的客户分布
<|{pd.cut(customer_data['lifetime_value'], bins=[0, 1000, 5000, 10000, 20000, 50000], labels=['0-1k', '1k-5k', '5k-10k', '10k-20k', '20k+']
).value_counts().reset_index().rename(columns={'index': 'value_range', 0: 'count'})}|chart|type=bar|x=value_range|y=count|title=客户终身价值分布|>
|>
<|
#### 会员等级与终身价值
<|{customer_data.groupby('membership_level')['lifetime_value'].mean().reset_index()}|chart|type=bar|x=membership_level|y=lifetime_value|title=会员等级平均终身价值|>
|>
|>
<|layout|columns=1 1|
<|
#### 客户类型与购买频率
<|{customer_data.groupby(['customer_type', 'purchase_frequency'])['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=bar|x=customer_type|y=count|color=purchase_frequency|title=客户类型与购买频率|>
|>
<|
#### 最有价值的TOP 20客户
<|{customer_data.sort_values('lifetime_value', ascending=False).head(20)}|table|height=400px|page_size=10|>
|>
|>
|>
|>
|>
"""
sales_page = """
<|navbar|>
# 销售报表详情
<|layout|columns=1 1 1|
<|
### 总收入
<|{filtered_revenue:,.2f}|text|format=$,.2f|class_name=display-6 fw-bold text-primary|>
|>
<|
### 总利润
<|{filtered_profit:,.2f}|text|format=$,.2f|class_name=display-6 fw-bold text-success|>
|>
<|
### 利润率
<|{profit_margin:.1f}%|text|class_name=display-6 fw-bold text-danger|>
|>
|>
## 销售数据表
<|{filtered_sales_data}|table|height=500px|page_size=10|>
## 销售图表分析
<|layout|columns=1 1|
<|
### 月度销售趋势
<|{sales_trend_chart}|chart|height=400px|>
|>
<|
### 按类别的收入
<|{revenue_by_category_chart}|chart|height=400px|>
|>
|>
<|返回仪表板|button|on_action=go_to_dashboard|>
"""
products_page = """
<|navbar|>
# 产品详情报表
## 产品数据
<|{product_data}|table|height=400px|page_size=10|>
## 产品分析
<|layout|columns=1 1|
<|
### 类别分布
<|{product_data.groupby('category')['product_id'].count().reset_index().rename(columns={'product_id': 'count'})}|chart|type=pie|names=category|values=count|title=产品类别分布|>
|>
<|
### 利润率最高的产品
<|{product_performance_chart}|chart|height=400px|>
|>
|>
<|layout|columns=1 1|
<|
### 库存状态分布
<|{product_data.groupby('status')['product_id'].count().reset_index().rename(columns={'product_id': 'count'})}|chart|type=pie|names=status|values=count|title=产品状态分布|>
|>
<|
### 按供应商分布
<|{product_data.groupby('supplier')['product_id'].count().reset_index().rename(columns={'product_id': 'count'})}|chart|type=bar|x=supplier|y=count|title=供应商产品数量|>
|>
|>
<|返回仪表板|button|on_action=go_to_dashboard|>
"""
customers_page = """
<|navbar|>
# 客户详情报表
## 客户数据
<|{customer_data}|table|height=400px|page_size=10|>
## 客户分析
<|layout|columns=1 1|
<|
### 客户类型分布
<|{customer_data.groupby('customer_type')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=pie|names=customer_type|values=count|title=客户类型分布|>
|>
<|
### 会员等级分布
<|{customer_data.groupby('membership_level')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=pie|names=membership_level|values=count|title=会员等级分布|>
|>
|>
<|layout|columns=1 1|
<|
### 购买频率分布
<|{customer_data.groupby('purchase_frequency')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'})}|chart|type=bar|x=purchase_frequency|y=count|title=购买频率分布|>
|>
<|
### 客户地域分布
<|{customer_data.groupby('province')['customer_id'].count().reset_index().rename(columns={'customer_id': 'count'}).sort_values('count', ascending=False)}|chart|type=bar|x=province|y=count|title=省份客户分布|>
|>
|>
<|返回仪表板|button|on_action=go_to_dashboard|>
"""
pages = {
"dashboard": dashboard_page,
"sales": sales_page,
"products": products_page,
"customers": customers_page
}
# 导航回调函数
def go_to_dashboard(state):
"""返回仪表板页面"""
navigate(state, "dashboard")
def go_to_sales(state):
"""跳转到销售页面"""
navigate(state, "sales")
def go_to_products(state):
"""跳转到产品页面"""
navigate(state, "products")
def go_to_customers(state):
"""跳转到客户页面"""
navigate(state, "customers")
# 页面导航配置
navbar = {
"销售仪表板": go_to_dashboard,
"销售报表": go_to_sales,
"产品分析": go_to_products,
"客户分析": go_to_customers
}
# 运行应用
if __name__ == "__main__":
gui = Gui(pages=pages, title="企业销售分析系统", dark_mode=False)
gui.run(debug=True)
9. Taipy在数据科学工作流中的应用
Taipy非常适合数据科学家创建交互式分析工具:
import taipy as tp
from taipy.gui import Gui, notify
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
import plotly.express as px
import plotly.graph_objects as go
import io
import base64
# 配置
available_datasets = {
"鸢尾花": load_iris,
"葡萄酒": load_wine,
"乳腺癌": load_breast_cancer
}
available_models = {
"随机森林": RandomForestClassifier,
"逻辑回归": LogisticRegression,
"支持向量机": SVC,
"梯度提升树": GradientBoostingClassifier
}
# 初始化配置
selected_dataset = "鸢尾花"
selected_model = "随机森林"
test_size = 0.3
random_state = 42
# 变量初始化
X = None
y = None
X_train = None
X_test = None
y_train = None
y_test = None
clf = None
feature_names = None
target_names = None
accuracy = 0
classification_rep = None
conf_matrix = None
feature_importance = None
train_time = 0
selected_features = []
scatter_x = None
scatter_y = None
scatter_color = None
scatter_plot = None
dataset_info = None
correlation_matrix = None
data_df = None
data_description = None
# 处理函数
def load_dataset(dataset_name):
"""加载并处理选定的数据集"""
dataset_loader = available_datasets[dataset_name]
dataset = dataset_loader()
X = dataset.data
y = dataset.target
# 创建DataFrame以便更好地操作
if hasattr(dataset, 'feature_names'):
feature_names = dataset.feature_names
else:
feature_names = [f"feature_{i}" for i in range(X.shape[1])]
if hasattr(dataset, 'target_names'):
target_names = dataset.target_names
else:
target_names = [f"class_{i}" for i in range(len(np.unique(y)))]
# 创建完整DataFrame
data_df = pd.DataFrame(X, columns=feature_names)
data_df['target'] = y
data_df['target_name'] = [target_names[i] for i in y]
# 数据集信息
dataset_info = {
"样本数": X.shape[0],
"特征数": X.shape[1],
"类别数": len(target_names),
"类别名称": ", ".join(target_names),
"特征名称": ", ".join(feature_names[:5]) + ("..." if len(feature_names) > 5 else "")
}
# 统计描述
data_description = data_df.describe().round(2)
# 相关性矩阵
correlation_matrix = data_df.drop(['target', 'target_name'], axis=1).corr().round(2)
# 初始化散点图特征
if X.shape[1] >= 2:
scatter_x = feature_names[0]
scatter_y = feature_names[1]
else:
scatter_x = feature_names[0]
scatter_y = feature_names[0]
# 创建散点图
scatter_plot = create_scatter_plot(data_df, scatter_x, scatter_y, 'target_name')
return X, y, feature_names, target_names, dataset_info, data_df, data_description, correlation_matrix, scatter_x, scatter_y, 'target_name', scatter_plot
def create_scatter_plot(df, x_col, y_col, color_col):
"""创建散点图"""
fig = px.scatter(
df, x=x_col, y=y_col, color=color_col,
title=f"{x_col} vs {y_col} by {color_col}",
template='plotly_white'
)
return fig
def create_correlation_heatmap(corr_matrix):
"""创建相关性热图"""
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('特征相关性矩阵')
# 将matplotlib图形转换为base64
buffer = io.BytesIO()
plt.savefig(buffer, format='png', bbox_inches='tight')
buffer.seek(0)
image_png = buffer.getvalue()
buffer.close()
graphic = base64.b64encode(image_png).decode('utf-8')
return f"<center><img src='data:image/png;base64,{graphic}'/></center>"
def train_model(X, y, model_name, test_size, random_state):
"""训练模型并返回结果"""
import time
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state
)
# 标准化特征
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 获取并配置模型
model_class = available_models[model_name]
clf = model_class(random_state=random_state)
# 记录训练时间
start_time = time.time()
clf.fit(X_train_scaled, y_train)
train_time = time.time() - start_time
# 预测
y_pred = clf.predict(X_test_scaled)
# 评估
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, output_dict=True)
conf_matrix = confusion_matrix(y_test, y_pred)
# 如果模型支持特征重要性,则获取
if hasattr(clf, 'feature_importances_'):
feature_importance = dict(zip(feature_names, clf.feature_importances_))
else:
feature_importance = None
return (X_train, X_test, y_train, y_test, clf, accuracy,
classification_rep, conf_matrix, feature_importance, train_time)
# 更新函数
def update_dataset(state):
"""更新选择的数据集"""
try:
(state.X, state.y, state.feature_names, state.target_names,
state.dataset_info, state.data_df, state.data_description,
state.correlation_matrix, state.scatter_x, state.scatter_y,
state.scatter_color, state.scatter_plot) = load_dataset(state.selected_dataset)
# 更新特征选择
state.selected_features = state.feature_names.copy()
# 更新相关性热图
state.correlation_heatmap = create_correlation_heatmap(state.correlation_matrix)
notify(state, "success", f"成功加载 {state.selected_dataset} 数据集")
except Exception as e:
notify(state, "error", f"加载数据集时出错: {str(e)}")
def update_scatter_plot(state):
"""更新散点图"""
try:
state.scatter_plot = create_scatter_plot(
state.data_df, state.scatter_x, state.scatter_y, state.scatter_color
)
notify(state, "success", "散点图已更新")
except Exception as e:
notify(state, "error", f"更新散点图时出错: {str(e)}")
def train_and_evaluate(state):
"""训练模型并评估"""
try:
# 获取选定的特征
if not state.selected_features:
notify(state, "warning", "请至少选择一个特征")
return
# 获取特征子集
X_subset = state.X[:, [state.feature_names.index(f) for f in state.selected_features]]
# 训练模型
(state.X_train, state.X_test, state.y_train, state.y_test,
state.clf, state.accuracy, state.classification_rep,
state.conf_matrix, state.feature_importance, state.train_time) = train_model(
X_subset, state.y, state.selected_model, state.test_size, state.random_state
)
# 准备分类报告表格
report_df = pd.DataFrame(state.classification_rep).transpose()
report_df = report_df.round(3)
state.classification_report_df = report_df
# 创建混淆矩阵热图
plt.figure(figsize=(8, 6))
sns.heatmap(state.conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=state.target_names, yticklabels=state.target_names)
plt.title('混淆矩阵')
plt.ylabel('真实标签')
plt.xlabel('预测标签')
# 将matplotlib图形转换为base64
buffer = io.BytesIO()
plt.savefig(buffer, format='png', bbox_inches='tight')
buffer.seek(0)
image_png = buffer.getvalue()
buffer.close()
state.confusion_matrix_plot = f"<center><img src='data:image/png;base64,{base64.b64encode(image_png).decode('utf-8')}'/></center>"
# 如果有特征重要性,创建图表
if state.feature_importance:
importance_df = pd.DataFrame({
'feature': list(state.feature_importance.keys()),
'importance': list(state.feature_importance.values())
}).sort_values('importance', ascending=False)
state.feature_importance_chart = px.bar(
importance_df, x='feature', y='importance',
title='特征重要性',
template='plotly_white'
)
else:
state.feature_importance_chart = None
notify(state, "success", f"{state.selected_model} 训练完成,准确率: {state.accuracy:.2f}")
except Exception as e:
notify(state, "error", f"模型训练时出错: {str(e)}")
# 初始化散点图
X, y, feature_names, target_names, dataset_info, data_df, data_description, correlation_matrix, scatter_x, scatter_y, scatter_color, scatter_plot = load_dataset("鸢尾花")
selected_features = feature_names.copy()
correlation_heatmap = create_correlation_heatmap(correlation_matrix)
# 标签变量
active_tab = "数据集探索"
# 页面布局
page = """
<|navbar|>
# 机器学习交互式工作台
<|tabs|lov=数据集探索;特征工程;模型训练与评估|active={active_tab}|>
<|{active_tab=="数据集探索"}|
## 数据集探索
<|layout|columns=1 3|
<|
### 数据集选择
<|{selected_dataset}|selector|lov={list(available_datasets.keys())}|on_change=update_dataset|>
<|加载数据集|button|on_action=update_dataset|>
### 数据集信息
<|{dataset_info is not None}|
<|{dataset_info}|table|>
|>
|>
<|
### 数据预览
<|{data_df is not None}|
<|{data_df.head(10)}|table|width=100%|>
|>
|>
|>
<|{data_df is not None}|
<|layout|columns=1 1|
<|
### 统计描述
<|{data_description}|table|width=100%|>
|>
<|
### 特征相关性矩阵
<|{correlation_heatmap}|raw|>
|>
|>
### 数据可视化
<|layout|columns=3 1|
<|
#### 散点图配置
X轴特征: <|{scatter_x}|selector|lov={feature_names}|>
Y轴特征: <|{scatter_y}|selector|lov={feature_names}|>
颜色分类: <|{scatter_color}|selector|lov=target_name|>
<|更新散点图|button|on_action=update_scatter_plot|>
|>
<|
#### 类别分布
<|{data_df}|chart|type=pie|labels=target_name|title=目标类别分布|>
|>
|>
<|{scatter_plot}|chart|height=500px|>
|>
|>
<|{active_tab=="特征工程"}|
## 特征工程
<|{data_df is not None}|
### 特征选择
<|{selected_features}|multiselect|lov={feature_names}|>
### 特征分布
<|layout|columns=2|
<|
#### 特征直方图
<|{data_df}|chart|type=histogram|x={feature_names[0] if feature_names else ""}|nbins=20|title=特征分布直方图|>
|>
<|
#### 特征箱形图
<|{data_df}|chart|type=box|x=target_name|y={feature_names[0] if feature_names else ""}|title=按类别的特征分布|>
|>
|>
### 特征相关性分析
<|{correlation_heatmap}|raw|>
|>
<|{data_df is None}|
请先在"数据集探索"选项卡中加载数据集。
|>
|>
<|{active_tab=="模型训练与评估"}|
## 模型训练与评估
<|{data_df is not None}|
<|layout|columns=1 1 1 1|
<|
### 模型选择
<|{selected_model}|selector|lov={list(available_models.keys())}|>
|>
<|
### 测试集比例
<|{test_size}|slider|min=0.1|max=0.5|step=0.05|>
|>
<|
### 随机种子
<|{random_state}|number|min=0|max=100|step=1|>
|>
<|
<|训练模型|button|on_action=train_and_evaluate|>
|>
|>
<|{accuracy > 0}|
<|layout|columns=1 1 1|
<|
### 模型准确率
<|{accuracy:.4f}|text|raw|class_name=display-4 text-success|>
|>
<|
### 训练时间
<|{train_time:.4f}|text|raw|> 秒
|>
<|
### 测试集大小
<|{X_test.shape[0]}|text|raw|> 个样本
|>
|>
### 分类报告
<|{classification_report_df}|table|width=100%|>
<|layout|columns=1 1|
<|
### 混淆矩阵
<|{confusion_matrix_plot}|raw|>
|>
<|
### 特征重要性
<|{feature_importance_chart}|chart|>
|>
|>
|>
<|{accuracy == 0}|
请选择模型选项并点击"训练模型"按钮。
|>
|>
<|{data_df is None}|
请先在"数据集探索"选项卡中加载数据集。
|>
|>
"""
# 运行应用
if __name__ == "__main__":
gui = Gui(page)
gui.run(debug=True)
10. 生产部署与企业应用
Taipy不仅适用于原型开发,也支持生产级部署:
import taipy as tp
from taipy.gui import Gui
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime, timedelta
import os
import json
import logging
import time
import markdown
import yaml
# 配置日志记录
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# 配置文件处理
def load_config(config_path="config.yaml"):
"""加载配置文件"""
if os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as file:
try:
config = yaml.safe_load(file)
logger.info(f"配置已从 {config_path} 加载")
return config
except Exception as e:
logger.error(f"加载配置失败: {str(e)}")
return {}
else:
logger.warning(f"配置文件 {config_path} 不存在,使用默认配置")
return {}
# 用户认证系统
class UserAuth:
"""简单的用户认证系统"""
def __init__(self, user_file="users.json"):
self.user_file = user_file
self.users = self._load_users()
def _load_users(self):
"""加载用户信息"""
if os.path.exists(self.user_file):
try:
with open(self.user_file, 'r') as file:
return json.load(file)
except Exception as e:
logger.error(f"加载用户信息失败: {str(e)}")
return {}
else:
# 默认用户
default_users = {
"admin": {
"password": "admin123",
"role": "admin",
"name": "管理员"
},
"user": {
"password": "user123",
"role": "user",
"name": "普通用户"
}
}
self._save_users(default_users)
return default_users
def _save_users(self, users):
"""保存用户信息"""
try:
with open(self.user_file, 'w') as file:
json.dump(users, file, indent=2)
logger.info("用户信息已保存")
except Exception as e:
logger.error(f"保存用户信息失败: {str(e)}")
def authenticate(self, username, password):
"""验证用户凭据"""
if username in self.users and self.users[username]["password"] == password:
logger.info(f"用户 {username} 认证成功")
return True, self.users[username]
logger.warning(f"用户 {username} 认证失败")
return False, None
def add_user(self, username, password, role, name):
"""添加新用户"""
if username in self.users:
logger.warning(f"用户 {username} 已存在")
return False, "用户已存在"
self.users[username] = {
"password": password,
"role": role,
"name": name
}
self._save_users(self.users)
logger.info(f"新用户 {username} ({role}) 已添加")
return True, "用户已添加"
def delete_user(self, username):
"""删除用户"""
if username not in self.users:
logger.warning(f"用户 {username} 不存在")
return False, "用户不存在"
if username == "admin":
logger.warning("尝试删除管理员账户被拒绝")
return False, "不能删除管理员账户"
del self.users[username]
self._save_users(self.users)
logger.info(f"用户 {username} 已删除")
return True, "用户已删除"
def get_all_users(self):
"""获取所有用户信息(不含密码)"""
user_list = []
for username, data in self.users.items():
user_list.append({
"username": username,
"role": data["role"],
"name": data["name"]
})
return user_list
# 数据处理类
class DataManager:
"""数据管理类"""
def __init__(self, data_dir="data"):
self.data_dir = data_dir
self._ensure_data_dir()
def _ensure_data_dir(self):
"""确保数据目录存在"""
if not os.path.exists(self.data_dir):
os.makedirs(self.data_dir)
logger.info(f"创建数据目录: {self.data_dir}")
def load_data(self, file_name):
"""加载数据文件"""
file_path = os.path.join(self.data_dir, file_name)
if not os.path.exists(file_path):
logger.warning(f"数据文件 {file_name} 不存在")
return None
try:
if file_name.endswith('.csv'):
data = pd.read_csv(file_path)
elif file_name.endswith('.xlsx'):
data = pd.read_excel(file_path)
elif file_name.endswith('.json'):
with open(file_path, 'r') as file:
data = json.load(file)
else:
logger.error(f"不支持的文件格式: {file_name}")
return None
logger.info(f"成功加载数据文件: {file_name}")
return data
except Exception as e:
logger.error(f"加载 {file_name} 时出错: {str(e)}")
return None
def save_data(self, data, file_name):
"""保存数据到文件"""
file_path = os.path.join(self.data_dir, file_name)
try:
if isinstance(data, pd.DataFrame):
if file_name.endswith('.csv'):
data.to_csv(file_path, index=False)
elif file_name.endswith('.xlsx'):
data.to_excel(file_path, index=False)
else:
logger.error(f"不支持的DataFrame输出格式: {file_name}")
return False
elif file_name.endswith('.json'):
with open(file_path, 'w') as file:
json.dump(data, file, indent=2)
else:
logger.error(f"不支持的数据类型或文件格式")
return False
logger.info(f"成功保存数据到: {file_name}")
return True
except Exception as e:
logger.error(f"保存 {file_name} 时出错: {str(e)}")
return False
def list_data_files(self):
"""列出数据目录中的所有文件"""
try:
files = os.listdir(self.data_dir)
return [f for f in files if os.path.isfile(os.path.join(self.data_dir, f))]
except Exception as e:
logger.error(f"列出数据文件时出错: {str(e)}")
return []
def delete_data_file(self, file_name):
"""删除数据文件"""
file_path = os.path.join(self.data_dir, file_name)
if not os.path.exists(file_path):
logger.warning(f"数据文件 {file_name} 不存在")
return False
try:
os.remove(file_path)
logger.info(f"成功删除数据文件: {file_name}")
return True
except Exception as e:
logger.error(f"删除 {file_name} 时出错: {str(e)}")
return False
def generate_demo_data(self):
"""生成演示数据"""
try:
# 销售数据
end_date = datetime.now()
start_date = end_date - timedelta(days=365)
dates = pd.date_range(start=start_date, end=end_date, freq='D')
sales_data = pd.DataFrame({
'date': dates,
'sales': np.random.normal(1000, 200, size=len(dates)) +
np.sin(np.linspace(0, 4*np.pi, len(dates))) * 200,
'expenses': np.random.normal(700, 100, size=len(dates))
})
# 添加一些周期性和趋势
sales_data['sales'] = sales_data['sales'] + np.arange(len(dates)) * 0.5
sales_data['expenses'] = sales_data['expenses'] + np.arange(len(dates)) * 0.3
# 添加类别
categories = ['电子产品', '家居用品', '服装', '食品', '其他']
sales_data['category'] = np.random.choice(categories, size=len(dates))
# 计算利润
sales_data['profit'] = sales_data['sales'] - sales_data['expenses']
# 四舍五入数值
sales_data['sales'] = sales_data['sales'].round(2)
sales_data['expenses'] = sales_data['expenses'].round(2)
sales_data['profit'] = sales_data['profit'].round(2)
# 保存数据
self.save_data(sales_data, 'demo_sales.csv')
# 用户活动数据
user_data = pd.DataFrame({
'date': dates,
'active_users': np.random.normal(500, 100, size=len(dates)),
'new_users': np.random.normal(50, 20, size=len(dates)),
'page_views': np.random.normal(2000, 500, size=len(dates))
})
# 添加一些趋势
user_data['active_users'] = user_data['active_users'] + np.arange(len(dates)) * 0.2
user_data['active_users'] = user_data['active_users'].astype(int)
user_data['new_users'] = user_data['new_users'].astype(int)
user_data['page_views'] = user_data['page_views'].astype(int)
# 保存数据
self.save_data(user_data, 'demo_users.csv')
logger.info("已生成演示数据文件")
return True
except Exception as e:
logger.error(f"生成演示数据时出错: {str(e)}")
return False
# 创建实例
user_auth = UserAuth()
data_manager = DataManager()
config = load_config()
# 应用状态初始化
is_authenticated = False
current_user = None
login_error = ""
current_page = "login"
app_title = config.get('title', 'Taipy企业应用平台')
app_theme = config.get('theme', 'light')
app_logo = config.get('logo', '')
# 初始数据
available_data_files = data_manager.list_data_files()
if not available_data_files:
data_manager.generate_demo_data()
available_data_files = data_manager.list_data_files()
selected_data_file = available_data_files[0] if available_data_files else None
data = None
if selected_data_file:
data = data_manager.load_data(os.path.join('data', selected_data_file))
# 图表初始化
sales_chart = None
category_chart = None
user_chart = None
# 用户管理
users_list = user_auth.get_all_users()
new_username = ""
new_password = ""
new_role = "user"
new_name = ""
user_message = ""
# 回调函数
def login(state):
"""用户登录"""
success, user = user_auth.authenticate(state.username, state.password)
if success:
state.is_authenticated = True
state.current_user = user
state.login_error = ""
state.current_page = "dashboard"
state.available_data_files = data_manager.list_data_files()
# 加载数据
if state.available_data_files:
state.selected_data_file = state.available_data_files[0]
load_selected_data(state)
logger.info(f"用户 {state.username} 登录成功")
else:
state.login_error = "用户名或密码错误"
logger.warning(f"用户 {state.username} 登录失败")
def logout(state):
"""用户登出"""
logger.info(f"用户 {state.username} 登出")
state.is_authenticated = False
state.current_user = None
state.username = ""
state.password = ""
state.current_page = "login"
def load_selected_data(state):
"""加载选中的数据文件"""
if state.selected_data_file:
state.data = data_manager.load_data(state.selected_data_file)
# 如果是DataFrame,尝试创建图表
if isinstance(state.data, pd.DataFrame):
try:
# 检查是否为销售数据
if all(col in state.data.columns for col in ['date', 'sales']):
state.data['date'] = pd.to_datetime(state.data['date'])
state.sales_chart = px.line(
state.data, x='date', y=['sales', 'expenses', 'profit'],
title='销售、支出和利润趋势',
labels={'value': '金额', 'date': '日期', 'variable': '指标'},
template='plotly_white'
)
# 创建类别图表
if 'category' in state.data.columns:
category_data = state.data.groupby('category')['sales'].sum().reset_index()
state.category_chart = px.pie(
category_data, values='sales', names='category',
title='按类别的销售额',
template='plotly_white'
)
# 检查是否为用户数据
if all(col in state.data.columns for col in ['date', 'active_users']):
state.data['date'] = pd.to_datetime(state.data['date'])
state.user_chart = px.line(
state.data, x='date', y=['active_users', 'new_users'],
title='用户活动趋势',
labels={'value': '用户数', 'date': '日期', 'variable': '指标'},
template='plotly_white'
)
except Exception as e:
logger.error(f"创建图表时出错: {str(e)}")
logger.info(f"已加载数据文件: {state.selected_data_file}")
def add_new_user(state):
"""添加新用户"""
if not state.new_username or not state.new_password or not state.new_role or not state.new_name:
state.user_message = "所有字段都必须填写"
return
# 验证当前用户是否为管理员
if state.current_user["role"] != "admin":
state.user_message = "只有管理员可以添加用户"
return
success, message = user_auth.add_user(
state.new_username, state.new_password, state.new_role, state.new_name
)
state.user_message = message
if success:
# 清空输入字段
state.new_username = ""
state.new_password = ""
state.new_name = ""
# 更新用户列表
state.users_list = user_auth.get_all_users()
def delete_user_account(state, username):
"""删除用户账户"""
# 验证当前用户是否为管理员
if state.current_user["role"] != "admin":
state.user_message = "只有管理员可以删除用户"
return
success, message = user_auth.delete_user(username)
state.user_message = message
if success:
# 更新用户列表
state.users_list = user_auth.get_all_users()
def generate_demo_data_action(state):
"""生成演示数据"""
success = data_manager.generate_demo_data()
if success:
state.user_message = "演示数据已生成"
# 更新文件列表
state.available_data_files = data_manager.list_data_files()
# 刷新选中文件
if state.available_data_files:
state.selected_data_file = state.available_data_files[0]
load_selected_data(state)
else:
state.user_message = "生成演示数据失败"
def change_theme(state):
"""切换应用主题"""
state.app_theme = "dark" if state.app_theme == "light" else "light"
def navigate_to(state, page):
"""导航到指定页面"""
state.current_page = page
# 定义页面布局
login_page = """
<|container|class_name=login-container|
<center>
# {app_title}
<|{login_error}|text|class_name=text-danger|>
<|{not is_authenticated}|
<|container|class_name=login-form|
**用户名**
<|{username}|input|>
**密码**
<|{password}|input|password=True|>
<|登录|button|on_action=login|class_name=btn-primary|>
|>
|>
</center>
|>
"""
dashboard_page = """
<|navbar|
brand={app_title}
on_action=navigate_to|
[监控仪表板](dashboard)
[数据管理](data)
[用户管理](users)
[系统设置](settings)
<|{current_user["name"]}|text|class_name=ms-auto|>
<|登出|button|on_action=logout|class_name=btn-sm ms-2|>
|>
<|container|
<|part|render={current_page == "dashboard"}|
# 监控仪表板
<|{data is not None}|
<|layout|columns=1 1 1|
<|
### 总销售额
<|{data["sales"].sum() if "sales" in data else 0}|text|raw|format=,.2f|class_name=display-4 text-primary|>
|>
<|
### 总支出
<|{data["expenses"].sum() if "expenses" in data else 0}|text|raw|format=,.2f|class_name=display-4 text-warning|>
|>
<|
### 总利润
<|{data["profit"].sum() if "profit" in data else 0}|text|raw|format=,.2f|class_name=display-4 text-success|>
|>
|>
<|layout|columns=2 1|
<|
### 销售趋势
<|{sales_chart}|chart|height=350px|>
|>
<|
### 按类别销售额
<|{category_chart}|chart|height=350px|>
|>
|>
<|{user_chart is not None}|
### 用户活动
<|{user_chart}|chart|height=350px|>
|>
|>
<|{data is None}|
**没有可用数据。请在"数据管理"中选择数据文件。**
|>
|>
<|part|render={current_page == "data"}|
# 数据管理
<|layout|columns=3 1|
<|
### 选择数据文件
<|{selected_data_file}|selector|lov={available_data_files}|on_change=load_selected_data|>
|>
<|
<|生成演示数据|button|on_action=generate_demo_data_action|>
|>
|>
<|{data is not None}|
### 数据预览
<|{data.head(10) if isinstance(data, pd.DataFrame) else data}|table|width=100%|>
|>
<|{data is not None and isinstance(data, pd.DataFrame)}|
### 数据统计
<|{data.describe() if "describe" in dir(data) else "无统计信息"}|table|width=100%|>
|>
<|{data is None}|
**没有选择数据文件或文件格式不支持。**
|>
|>
<|part|render={current_page == "users"}|
# 用户管理
<|{current_user and current_user["role"] == "admin"}|
<|layout|columns=1 1 1 1|
<|
#### 用户名
<|{new_username}|input|>
|>
<|
#### 密码
<|{new_password}|input|password=True|>
|>
<|
#### 角色
<|{new_role}|selector|lov=admin;user|>
|>
<|
#### 姓名
<|{new_name}|input|>
|>
|>
<|添加用户|button|on_action=add_new_user|>
<|{user_message}|text|class_name=mt-3|>
|>
### 用户列表
<|{len(users_list) > 0}|
<|layout|columns=1 1 1 2|class_name=fw-bold|
<|用户名|>
<|角色|>
<|姓名|>
<|操作|>
|>
<|{users_list}|expandable|expanded=True|
<|layout|columns=1 1 1 2|
<|{item["username"]}|>
<|{item["role"]}|>
<|{item["name"]}|>
<|
<|{current_user["role"] == "admin" and item["username"] != "admin"}|
<|删除|button|on_action=delete_user_account|item={item["username"]}|class_name=btn-sm btn-danger|>
|>
|>
|>
|>
|>
<|{current_user and current_user["role"] != "admin"}|
<div class="alert alert-warning">
只有管理员用户可以管理用户。
</div>
|>
|>
<|part|render={current_page == "settings"}|
# 系统设置
<|layout|columns=1 1|
<|
### 应用主题
当前主题: <|{app_theme}|text|>
<|切换主题|button|on_action=change_theme|>
|>
<|
### 关于应用
**应用名称**: <|{app_title}|text|>
**版本**: 1.0.0
**构建于**: 2023-05-15
|>
|>
### 系统信息
<|layout|columns=1 1 1|
<|
#### Python版本
<|{platform.python_version()}|text|>
|>
<|
#### 操作系统
<|{platform.system()} {platform.release()}|text|>
|>
<|
#### Taipy版本
<|{tp.__version__}|text|>
|>
|>
### 帮助文档
```markdown
## 使用指南
1. **仪表板**: 查看关键指标和图表
2. **数据管理**: 加载和预览数据文件
3. **用户管理**: 添加和管理用户账户
4. **系统设置**: 配置应用设置
如需更多帮助,请联系系统管理员。
|>
|>
"""
# 导入系统信息模块
import platform
# 定义页面集合
pages = {
"login": login_page,
"dashboard": dashboard_page
}
# 创建和启动应用
if __name__ == "__main__":
import argparse
# 命令行参数
parser = argparse.ArgumentParser(description='Taipy企业应用启动器')
parser.add_argument('--port', type=int, default=5000, help='服务端口号')
parser.add_argument('--debug', action='store_true', help='启用调试模式')
args = parser.parse_args()
# 初始化应用
gui = Gui(pages=pages)
# 启动服务
logger.info(f"应用启动于端口 {args.port},调试模式: {args.debug}")
gui.run(debug=args.debug, port=args.port)
11. 总结
Taipy是Python世界中一个强大的数据应用开发框架,它通过简化界面开发流程,让数据科学家和分析师能够快速将数据分析脚本转变为精美的交互式Web应用。
Taipy的主要优势:
- 低代码开发 - 使用类似Markdown的语法定义UI,无需HTML/CSS/JavaScript知识
- 无缝Python集成 - 直接连接到现有数据处理和分析代码
- 响应式状态管理 - 自动处理前端与后端的数据同步
- 丰富的组件库 - 提供各类UI控件,从简单文本到复杂图表
- 企业级特性 - 支持多页应用、导航、认证和授权
- 扩展性与定制化 - 易于扩展和定制以满足特定需求
- 全栈数据平台 - 除了GUI外,还提供数据工作流、任务调度和数据管道功能
适用场景:
- 数据分析原型 - 快速构建POC和MVP展示分析结果
- 交互式仪表板 - 创建动态、可交互的数据可视化界面
- 内部工具开发 - 构建用于数据处理和分析的专业工具
- 业务应用 - 开发完整的数据驱动业务应用
- 机器学习界面 - 为模型训练、评估和推理构建交互界面
通过Taipy,数据专业人员可以专注于数据和业务逻辑,而不需要深入学习复杂的Web前端技术。这大大缩短了从分析到应用的转化时间,提高了迭代速度,最终为企业和用户带来更大的数据价值。
无论你是数据科学家想要分享分析结果,还是开发者需要快速构建数据应用,Taipy都提供了一种简洁而强大的方式,让你在短短几分钟内就能将Python代码转变为美观、实用的Web应用。