import os
import pickle
import pandas as pd
import dash
from dash import dcc, html, dash_table, callback_context, ctx
from dash.dependencies import Input, Output, State, ClientsideFunction
import dash_bootstrap_components as dbc
from datetime import datetime, timedelta
import base64
import io, time
def get_time_str():
return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def time_print(*s, sep='\n'):
print(f"{get_time_str()} {s}", sep=sep)
# time.sleep(0.5) # 防止执行太快,看着没感觉。
pass
# 初始化 Dash App(使用 Bootstrap 主题增强 UI)
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.LUMEN])
server = app.server # 部署时需要
# 全局变量用于缓存
DATA_CACHE = {}
CLASSIFICATION_CACHE = {}
RESULT_CACHE = {}
RESULT_CACHE_TIME = None
CACHE_TTL = timedelta(minutes=10)
# 条目分类下拉选项
CATEGORY_OPTIONS = [
{"label": "安装", "value": "安装"},
{"label": "勘测", "value": "勘测"},
{"label": "可选", "value": "可选"},
{"label": "网优", "value": "网优"},
{"label": "运输-正向", "value": "运输-正向"},
{"label": "运输-逆向", "value": "运输-逆向"}
]
# 样式定义
TABLE_STYLE = {
'maxHeight': '600px',
'overflowY': 'auto',
'overflowX': 'auto',
'whiteSpace': 'nowrap'
}
CELL_TOOLTIP_STYLE = {
'textAlign': 'left',
'overflow': 'hidden',
'textOverflow': 'ellipsis',
'maxWidth': 150,
'minWidth': 100
}
# -------------------------------
# 工具函数
# -------------------------------
def save_to_pickle(df_dict, pickle_path):
"""将整个 Excel 的多个 sheet 保存为 .pickle 文件"""
with open(pickle_path, 'wb') as f:
pickle.dump(df_dict, f)
def load_from_pickle(pickle_path):
"""从 .pickle 文件加载数据"""
with open(pickle_path, 'rb') as f:
return pickle.load(f)
def read_excel_with_sheets(file_path):
"""读取 Excel 所有 sheet 并返回字典 {sheet_name: DataFrame}"""
try:
xl = pd.ExcelFile(file_path)
sheets = {sheet: xl.parse(sheet) for sheet in xl.sheet_names}
return sheets, None
except Exception as e:
return None, str(e)
def get_file_size_mb(file_path):
"""获取文件大小(MB)"""
size_bytes = os.path.getsize(file_path)
return round(size_bytes / (1024 * 1024), 2)
def validate_columns(df, required_cols):
"""检查 DataFrame 是否包含所需列"""
missing = [col for col in required_cols if col not in df.columns]
return len(missing) == 0, missing
def site_model(data_df, classification_df):
"""
数据分析核心函数:关联两个数据集并生成结果
假设逻辑:以条目描述为键,匹配分类信息,并补充项目编码和分类
"""
if data_df is None or classification_df is None:
return pd.DataFrame({"错误": ["缺少原始或分类数据"]})
# 只保留关键字段进行合并
merge_on = "条目描述"
if merge_on not in data_df.columns or merge_on not in classification_df.columns:
return pd.DataFrame({"错误": [f"无法找到 '{merge_on}' 字段用于关联"]})
result = data_df.copy()
classification_clean = classification_df[[merge_on, "条目分类", "项目编码"]].drop_duplicates(subset=[merge_on])
result = result.merge(classification_clean, on=merge_on, how="left", suffixes=('', '_分类'))
# 处理空值
result["条目分类"].fillna("未分类", inplace=True)
result["项目编码"].fillna("ALL", inplace=True)
return result
# -------------------------------
# 布局构建
# -------------------------------
app.layout = html.Div([
# 标题
html.H1("站点模型分类工具", style={'textAlign': 'center', 'margin': '30px'}),
html.Div([
html.H4("选择Excel文件"),
# 文件上传组件(支持路径选择或拖拽)
dcc.Upload(
id='upload-data',
children=html.Div([
'拖拽文件到这里,或 ',
html.A('点击选择文件')
]),
style={
'width': '100%', 'height': '60px', 'lineHeight': '60px',
'borderWidth': '1px', 'borderStyle': 'dashed',
'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px'
},
multiple=False
),
html.Div(id='file-info', style={'margin': '10px'}),
html.Div(id='cache-status', style={'margin': '10px'}),
], style={'padding': '20px'}),
# Tabs 区域
dcc.Tabs(id="main-tabs", value='tab-import', children=[
# 第一个 Tab:导入数据及预览
dcc.Tab(label='导入数据及预览', value='tab-import', children=[
html.H5("选择Sheet"),
dcc.Dropdown(id='sheet-dropdown'),
html.Div(id='validation-alert', style={'margin': '10px'}),
html.Div(id='data-preview-table', style=TABLE_STYLE),
]),
# 第二个 Tab:条目分类
dcc.Tab(id='tab-classify', label='条目分类', value='tab-classify', disabled=True, children=[
html.Div([
html.H4("条目分类管理"),
html.Div([
html.Button("批量修改分类", id="btn-batch-category", n_clicks=0, style={'margin': '5px'}),
html.Button("批量修改项目编码", id="btn-batch-code", n_clicks=0, style={'margin': '5px'})
], style={'margin': '10px'}),
# 批量输入 Modal
dbc.Modal([
dbc.ModalHeader("批量修改 - 条目分类"),
dbc.ModalBody([
html.Label("选择新分类:"),
dcc.Dropdown(id="modal-category-dropdown", options=CATEGORY_OPTIONS),
]),
dbc.ModalFooter([
html.Button("取消", id="close-category-modal", n_clicks=0),
html.Button("确认修改", id="confirm-category-modal", n_clicks=0)
])
], id="category-modal", is_open=False),
dbc.Modal([
dbc.ModalHeader("批量修改 - 项目编码"),
dbc.ModalBody([
html.Label("输入新的项目编码(留空表示 ALL):"),
dcc.Input(id="modal-code-input", type="text", placeholder="例如:PROJ001", style={'width': '100%'})
]),
dbc.ModalFooter([
html.Button("取消", id="close-code-modal", n_clicks=0),
html.Button("确认修改", id="confirm-code-modal", n_clicks=0)
])
], id="code-modal", is_open=False),
dash_table.DataTable(
id='classification-table',
page_size=15,
# row_deletable=True,
style_table=TABLE_STYLE,
style_cell=CELL_TOOLTIP_STYLE,
tooltip_data=[],
tooltip_duration=None,
editable=True, # 允许编辑
# 新增:支持多选
row_selectable="multi", # 可勾选多行
# 新增:排序和筛选
sort_action="native", # 允许点击列头排序
filter_action="native", # 内置过滤器(搜索框)
# 固定表头(可选)
fixed_rows={'headers': True},
)
], style={'padding': '20px'}),
html.Div(id='selection-count', style={'margin': '10px', 'color': '#666'}),
]),
# 第三个 Tab:数据分析
dcc.Tab(id='tab-analyze', label='数据分析', value='tab-analyze', disabled=True, children=[
html.Div([
html.H4("分析结果"),
html.Button("刷新结果", id="btn-refresh-result", n_clicks=0, style={'margin': '10px'}),
html.Div(id='result-timestamp', style={'margin': '10px', 'color': '#555'}),
dash_table.DataTable(
id='analysis-result-table',
page_size=20,
style_table=TABLE_STYLE,
style_cell=CELL_TOOLTIP_STYLE,
tooltip_data=[],
tooltip_duration=None
),
# 导出按钮
html.Div([
html.Button("导出全部数据", id="btn-export", n_clicks=0, style={'margin': '20px'}),
dcc.Download(id="download-data")
])
], style={'padding': '20px'})
])
]),
# 存储中间数据
dcc.Store(id='stored-data-path'),
dcc.Store(id='stored-sheet-name'),
dcc.Store(id='stored-classification-edited'),
dcc.Store(id='tab-states', data={'can_proceed': False}),
# 在布局中添加
dcc.Store(id='stored-sheet', data=None), # 记住当前选择的 sheet
# 在 layout 中添加
dcc.Store(id='stored-upload-hash', data=None),
])
# -------------------------------
# 回调函数 Callbacks
# -------------------------------
from dash.exceptions import PreventUpdate
import hashlib
@app.callback(
[Output('file-info', 'children'),
Output('cache-status', 'children'),
Output('stored-data-path', 'data'),
Output('sheet-dropdown', 'options'),
Output('sheet-dropdown', 'value'),
Output('tab-states', 'data'),
# Output('stored-sheet', 'data'),
Output('stored-upload-hash', 'data')], # 新增输出:保存当前 hash
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'),
State('stored-upload-hash', 'data'), # 读取上次处理过的文件hash
prevent_initial_call=True # 👈 加上这一句!
)
def handle_file_upload(contents, filename, date, last_processed_hash):
time_print("handle_file_upload...")
# print(len(contents.__str__()))
if len(contents.__str__()) < 10:
time_print("init raise...")
raise PreventUpdate
return "", "", None, [], None, {'can_proceed': False}
# 提取 content_string(Base64 部分)
content_type, content_string = contents.split(',', 1) # 分离 MIME 类型和数据
# 生成本次内容的哈希(避免重复处理同一文件)
current_hash = hashlib.md5(content_string.encode()).hexdigest()
# ✅ 如果和上次处理的一样,直接阻止更新
# time_print(current_hash==last_processed_hash)
if current_hash == last_processed_hash:
time_print("hash raise...")
raise dash.exceptions.PreventUpdate
time_print("load file...")
try:
# 解码内容
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
temp_path = f"./temp_{filename}"
with open(temp_path, 'wb') as f:
f.write(decoded)
file_size = get_file_size_mb(temp_path)
info_msg = f"文件名: {filename} | 大小: {file_size} MB"
# 构造 pickle 路径
pickle_path = temp_path + ".pickle"
# 获取文件所在目录和基础名
# file_dir = os.path.dirname(os.path.abspath(temp_path))
# file_base = os.path.basename(temp_path)
# 构造 .pickle 完整路径
# pickle_filename = file_base + ".pickle"
# pickle_path = os.path.join(file_dir, pickle_filename)
# 检查是否已有 pickle 缓存
if os.path.exists(pickle_path):
sheets_dict = load_from_pickle(pickle_path)
cache_msg = f"✅ 已从缓存加载 {pickle_path}.pickle 文件"
else:
sheets_dict, error = read_excel_with_sheets(temp_path)
if error:
return f"❌ 文件读取失败: {error}", "", None, [], None, {'can_proceed': False}
save_to_pickle(sheets_dict, pickle_path)
cache_msg = f"🆕 已读取 Excel 并缓存为 {pickle_path}.pickle"
# 获取所有 sheet 名称
sheet_names = list(sheets_dict.keys())
dropdown_options = [{'label': name, 'value': name} for name in sheet_names]
return info_msg, cache_msg, temp_path, dropdown_options, sheet_names[0], {'can_proceed': True}, current_hash
except Exception as e:
return f"❌ 处理失败: {str(e)}", "", None, [], None, {'can_proceed': False}, current_hash
@app.callback(
[Output('data-preview-table', 'children'),
Output('validation-alert', 'children'),
Output('tab-classify', 'disabled'),
Output('tab-analyze', 'disabled')],
[Input('sheet-dropdown', 'value'),
State('stored-data-path', 'data')],
prevent_initial_call=True
)
def update_preview(selected_sheet, file_path):
time_print("update preview...")
if not file_path or not selected_sheet:
alert = dbc.Alert("请先上传文件。", color="info")
table = html.Div()
return table, alert, True, True
try:
pickle_path = file_path + ".pickle"
sheets_dict = load_from_pickle(pickle_path)
df = sheets_dict[selected_sheet]
# 检查必要字段
required_cols = ["条目描述", "项目编码", "项目名称"]
valid, missing = validate_columns(df, required_cols)
if valid:
alert = dbc.Alert(f"✅ PO明细读取成功,请检查条目分类", color="safe")
alert = html.Div() # 清除警告
can_proceed = False
# 构建表格(带悬浮提示)
columns = [{"name": col, "id": col, "editable": False} for col in df.columns]
data = df.to_dict('records')
table = dash_table.DataTable(
data=data,
columns=columns,
page_size=20,
style_table=TABLE_STYLE,
style_cell=CELL_TOOLTIP_STYLE,
tooltip_data=[
{col: {'value': str(row[col]), 'type': 'markdown'} for col in df.columns}
for row in data
],
tooltip_duration=None
)
else:
alert = dbc.Alert(f"❌ 缺少字段: {', '.join(missing)},请尝试切换Sheet", color="danger")
table = None
can_proceed = True
return table, alert, can_proceed, can_proceed
except Exception as e:
alert = dbc.Alert(f"❌ 预览失败: {str(e)}", color="danger")
return html.Div(), alert, False, False
@app.callback(
[Output('classification-table', 'data'),
Output('classification-table', 'columns'),
Output('classification-table', 'tooltip_data'),
Output('classification-table', 'dropdown')], # 新增:下拉选项
Input('stored-data-path', 'data'),
Input('sheet-dropdown', 'value'),
Input('main-tabs', 'value'),
State('classification-table', 'data'),
prevent_initial_call=True
)
def load_classification_data(file_path, selected_sheet, current_tab, existing_data):
if current_tab != 'tab-classify' or not file_path:
raise dash.exceptions.PreventUpdate
try:
# 尝试加载已存在的分类文件
class_file = os.path.join(os.path.dirname(file_path), "classification.xlsx")
df_class = None
if os.path.exists(class_file):
df_class = pd.read_excel(class_file)
required = ["条目描述", "条目分类", "项目编码"]
valid, _ = validate_columns(df_class, required)
if not valid:
df_class = None
# 如果没有有效分类文件,则从主数据生成默认表
if df_class is None:
pickle_path = file_path + ".pickle"
sheets_dict = load_from_pickle(pickle_path)
main_df = sheets_dict[selected_sheet]
desc_col = "条目描述"
if desc_col in main_df.columns:
unique_desc = main_df[desc_col].drop_duplicates().reset_index(drop=True)
df_class = pd.DataFrame({
"条目描述": unique_desc,
"条目分类": "",
"项目编码": ""
})
else:
df_class = pd.DataFrame(columns=["条目描述", "条目分类", "项目编码"])
# 构建列定义
columns = [
{"name": "条目描述", "id": "条目描述", "editable": False},
{
"name": "条目分类",
"id": "条目分类",
"presentation": "dropdown",
"editable": True
},
{
"name": "项目编码",
"id": "项目编码",
"editable": True
}
]
# 设置下拉选项
dropdown = {
'条目分类': {
'options': CATEGORY_OPTIONS
}
}
# 构建数据与提示
data = df_class.to_dict('records')
tooltip_data = [
{col: {'value': str(val), 'type': 'markdown'} for col, val in row.items()}
for row in data
]
global CLASSIFICATION_CACHE
CLASSIFICATION_CACHE = data.copy()
return data, columns, tooltip_data, dropdown
except Exception as e:
print(f"[Error] 加载分类数据失败: {e}")
return [], [], [], {}
@app.callback(
Output('classification-table', 'data', allow_duplicate=True),
[Input('confirm-category-modal', 'n_clicks'),
Input('confirm-code-modal', 'n_clicks')],
[State('classification-table', 'data'),
State('classification-table', 'selected_rows'), # 获取选中行
State('modal-category-dropdown', 'value'),
State('modal-code-input', 'value')],
prevent_initial_call=True
)
def apply_batch_changes(nc1, nc2, data, selected_rows, new_category, new_code):
ctx = callback_context
if not ctx.triggered or not data:
raise PreventUpdate
btn = ctx.triggered[0]['prop_id'].split('.')[0]
# 若未选择任何行,默认应用到所有行
if not selected_rows or selected_rows == []:
selected_indices = range(len(data))
else:
selected_indices = selected_rows
if btn == 'confirm-category-modal' and new_category:
for i in selected_indices:
data[i]['条目分类'] = new_category
elif btn == 'confirm-code-modal':
final_code = new_code if new_code else "ALL"
for i in selected_indices:
data[i]['项目编码'] = final_code
return data
@app.callback(
[Output('category-modal', 'is_open'),
Output('code-modal', 'is_open')],
[Input('btn-batch-category', 'n_clicks'),
Input('btn-batch-code', 'n_clicks'),
Input('close-category-modal', 'n_clicks'),
Input('close-code-modal', 'n_clicks'),
Input('confirm-category-modal', 'n_clicks'),
Input('confirm-code-modal', 'n_clicks')],
[State('category-modal', 'is_open'),
State('code-modal', 'is_open')],
prevent_initial_call=True
)
def toggle_modals(nc1, nc2, nc3, nc4, nc5, nc6, is_cat_open, is_code_open):
ctx = callback_context
if not ctx.triggered:
return False, False
btn = ctx.triggered[0]['prop_id'].split('.')[0]
if btn == 'btn-batch-category':
return True, False
elif btn == 'btn-batch-code':
return False, True
elif btn in ['close-category-modal', 'confirm-category-modal']:
return False, is_code_open
elif btn in ['close-code-modal', 'confirm-code-modal']:
return is_cat_open, False
return False, False
@app.callback(
Output('classification-table', 'data', allow_duplicate=True),
[Input('confirm-category-modal', 'n_clicks'),
Input('confirm-code-modal', 'n_clicks')],
[State('classification-table', 'data'),
State('modal-category-dropdown', 'value'),
State('modal-code-input', 'value')],
prevent_initial_call=True
)
def apply_batch_changes(nc1, nc2, data, new_category, new_code):
ctx = callback_context
if not ctx.triggered:
raise dash.exceptions.PreventUpdate
btn = ctx.triggered[0]['prop_id'].split('.')[0]
if btn == 'confirm-category-modal' and new_category:
for row in data:
row['条目分类'] = new_category
elif btn == 'confirm-code-modal':
final_code = new_code if new_code else "ALL"
for row in data:
row['项目编码'] = final_code
return data
@app.callback(
Output('selection-count', 'children'),
Input('classification-table', 'derived_viewport_selected_row_ids')
)
def show_selection_count(selected_ids):
if not selected_ids:
return "未选中任何行"
return f"已选中 {len(selected_ids)} 行"
@app.callback(
[Output('analysis-result-table', 'data'),
Output('analysis-result-table', 'columns'),
Output('analysis-result-table', 'tooltip_data'),
Output('result-timestamp', 'children')],
[Input('btn-refresh-result', 'n_clicks'),
Input('main-tabs', 'value')],
[State('stored-data-path', 'data'),
State('sheet-dropdown', 'value'),
State('classification-table', 'data')],
prevent_initial_call=True
)
def run_analysis(n_clicks, tab_value, file_path, sheet_name, classification_data):
global RESULT_CACHE, RESULT_CACHE_TIME
if tab_value != 'tab-analyze' or not file_path:
raise dash.exceptions.PreventUpdate
now = datetime.now()
# 判断是否刷新或缓存过期
do_refresh = (ctx.triggered[0]['prop_id'].split('.')[0] == 'btn-refresh-result')
cache_expired = RESULT_CACHE_TIME is None or (now - RESULT_CACHE_TIME) > CACHE_TTL
if do_refresh or cache_expired:
try:
# 加载原始数据
pickle_path = file_path + ".pickle"
sheets_dict = load_from_pickle(pickle_path)
data_df = sheets_dict[sheet_name]
# 转换分类数据为 DataFrame
class_df = pd.DataFrame(classification_data)
# 执行分析
result_df = site_model(data_df, class_df)
# 缓存结果
RESULT_CACHE = result_df.copy()
RESULT_CACHE_TIME = now
except Exception as e:
RESULT_CACHE = pd.DataFrame({"错误": [f"分析失败: {e}"]})
timestamp_str = RESULT_CACHE_TIME.strftime("%Y-%m-%d %H:%M:%S") if RESULT_CACHE_TIME else "无"
columns = [{"name": i, "id": i} for i in RESULT_CACHE.columns]
data = RESULT_CACHE.to_dict('records')
tooltip_data = [
{col: {'value': str(val), 'type': 'markdown'} for col, val in row.items()}
for row in data
]
return data, columns, tooltip_data, f"上次刷新时间: {timestamp_str}"
@app.callback(
Output("download-data", "data"),
Input("btn-export", "n_clicks"),
[State('stored-data-path', 'data'),
State('sheet-dropdown', 'value'),
State('classification-table', 'data'),
State('analysis-result-table', 'data'),
State('analysis-result-table', 'columns')],
prevent_initial_call=True
)
def export_data(n_clicks, file_path, sheet_name, class_data, result_data, result_cols):
if not file_path:
return None
# 构建输出文件名
output_filename = "站点模型分类结果_" + datetime.now().strftime("%Y%m%d_%H%M%S") + ".xlsx"
# 创建内存中的 Excel writer
buffer = io.BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
# Sheet1: 原始数据
pickle_path = file_path + ".pickle"
sheets_dict = load_from_pickle(pickle_path)
original_df = sheets_dict[sheet_name]
original_df.to_excel(writer, index=False, sheet_name="原始数据")
# Sheet2: 分类数据
class_df = pd.DataFrame(class_data)
class_df.to_excel(writer, index=False, sheet_name="条目分类")
# Sheet3: 分析结果
result_df = pd.DataFrame(result_data)
result_df.to_excel(writer, index=False, sheet_name="分析结果")
buffer.seek(0)
return dcc.send_bytes(buffer.getvalue(), output_filename)
# 启动服务器
if __name__ == '__main__':
app.run(debug=True, port=8050)
优化这个程序,去除冗余,并使其具备良好维护性
最新发布