php array column,PHP: array_column - Manual

本文介绍了一个 PHP 自定义函数 array_column_recursive 的实现细节,该函数能够递归地从多维数组中提取指定列的数据,并支持使用指定的键进行索引。

This didn't work for me recursively and needed to come up with a solution.

Here's my solution to the function:

if ( ! function_exists( 'array_column_recursive' ) ) {

/**

* Returns the values recursively from columns of the input array, identified by

* the $columnKey.

*

* Optionally, you may provide an $indexKey to index the values in the returned

* array by the values from the $indexKey column in the input array.

*

* @param array $input     A multi-dimensional array (record set) from which to pull

*                         a column of values.

* @param mixed $columnKey The column of values to return. This value may be the

*                         integer key of the column you wish to retrieve, or it

*                         may be the string key name for an associative array.

* @param mixed $indexKey  (Optional.) The column to use as the index/keys for

*                         the returned array. This value may be the integer key

*                         of the column, or it may be the string key name.

*

* @return array

*/

function array_column_recursive( $input = NULL, $columnKey = NULL, $indexKey = NULL ) {

// Using func_get_args() in order to check for proper number of

// parameters and trigger errors exactly as the built-in array_column()

// does in PHP 5.5.

$argc   = func_num_args();

$params = func_get_args();

if ( $argc < 2 ) {

trigger_error( "array_column_recursive() expects at least 2 parameters, {$argc} given", E_USER_WARNING );

return NULL;

}

if ( ! is_array( $params[ 0 ] ) ) {

// Because we call back to this function, check if call was made by self to

// prevent debug/error output for recursiveness :)

$callers = debug_backtrace();

if ( $callers[ 1 ][ 'function' ] != 'array_column_recursive' ){

trigger_error( 'array_column_recursive() expects parameter 1 to be array, ' . gettype( $params[ 0 ] ) . ' given', E_USER_WARNING );

}

return NULL;

}

if ( ! is_int( $params[ 1 ] )

&& ! is_float( $params[ 1 ] )

&& ! is_string( $params[ 1 ] )

&& $params[ 1 ] !== NULL

&& ! ( is_object( $params[ 1 ] ) && method_exists( $params[ 1 ], '__toString' ) )

) {

trigger_error( 'array_column_recursive(): The column key should be either a string or an integer', E_USER_WARNING );

return FALSE;

}

if ( isset( $params[ 2 ] )

&& ! is_int( $params[ 2 ] )

&& ! is_float( $params[ 2 ] )

&& ! is_string( $params[ 2 ] )

&& ! ( is_object( $params[ 2 ] ) && method_exists( $params[ 2 ], '__toString' ) )

) {

trigger_error( 'array_column_recursive(): The index key should be either a string or an integer', E_USER_WARNING );

return FALSE;

}

$paramsInput     = $params[ 0 ];

$paramsColumnKey = ( $params[ 1 ] !== NULL ) ? (string) $params[ 1 ] : NULL;

$paramsIndexKey  = NULL;

if ( isset( $params[ 2 ] ) ) {

if ( is_float( $params[ 2 ] ) || is_int( $params[ 2 ] ) ) {

$paramsIndexKey = (int) $params[ 2 ];

} else {

$paramsIndexKey = (string) $params[ 2 ];

}

}

$resultArray = array();

foreach ( $paramsInput as $row ) {

$key    = $value = NULL;

$keySet = $valueSet = FALSE;

if ( $paramsIndexKey !== NULL && array_key_exists( $paramsIndexKey, $row ) ) {

$keySet = TRUE;

$key    = (string) $row[ $paramsIndexKey ];

}

if ( $paramsColumnKey === NULL ) {

$valueSet = TRUE;

$value    = $row;

} elseif ( is_array( $row ) && array_key_exists( $paramsColumnKey, $row ) ) {

$valueSet = TRUE;

$value    = $row[ $paramsColumnKey ];

}

$possibleValue = array_column_recursive( $row, $paramsColumnKey, $paramsIndexKey );

if ( $possibleValue ) {

$resultArray = array_merge( $possibleValue, $resultArray );

}

if ( $valueSet ) {

if ( $keySet ) {

$resultArray[ $key ] = $value;

} else {

$resultArray[ ] = $value;

}

}

}

return $resultArray;

}

}

(base) PS C:\work\hydraulic_calculation - 2\read_mat> & C:/software/anaconda3/python.exe "c:/work/hydraulic_calculation - 2/read_mat/simple_string_decoder.py" ============================================================ 解析Dymola MAT文件: HeatingSystem_res.mat ============================================================ 解析错误: tuple index out of range 使用原始数据块创建DataFrame DataFrame摘要: 形状: (506, 60) 列数: 60 前5行: var_0 var_1 var_2 var_3 var_4 var_5 var_6 var_7 var_8 var_9 ... var_50 var_51 var_52 var_53 var_54 var_55 var_56 var_57 var_58 var_59 0 0.0 1.0 0.0 -1.0 0.0 0.05 983.2 0.00046 0.5 0.3 ... 100.0 0.910687 0.58284 0.58284 0.910687 0.0 0.0 0.0 2.0 1.0 1 2.0 2.0 0.0 0.0 1.0 0.05 983.2 0.00046 0.5 0.3 ... 100.0 0.910687 0.58284 0.58284 0.910687 0.0 0.0 0.0 2.0 1.0 2 2.0 3.0 0.0 0.0 NaN NaN NaN NaN NaN NaN ... 100.0 0.910687 0.58284 0.58284 0.910687 0.0 0.0 0.0 2.0 1.0 3 2.0 4.0 0.0 0.0 NaN NaN NaN NaN NaN NaN ... 100.0 0.910687 0.58284 0.58284 0.910687 0.0 0.0 0.0 2.0 1.0 4 2.0 5.0 0.0 0.0 NaN NaN NaN NaN NaN NaN ... 100.0 0.910687 0.58284 0.58284 0.910687 0.0 0.0 0.0 2.0 1.0 [5 rows x 60 columns] 识别物理变量类型... 变量类型识别: var_0: temperature var_1: temperature var_4: temperature var_5: flow var_7: flow var_8: temperature var_9: temperature var_11: temperature var_12: temperature var_13: temperature 提取供热系统关键数据... 提取的关键变量: ['node_pressure_1', 'node_pressure_2', 'node_pressure_3', 'flow_rate', 'flow_rate_2', 'flow_rate_3', 'flow_rate_4', 'flow_rate_5', 'flow_rate_6', 'flow_rate_7', 'flow_rate_8', 'flow_rate_9', 'flow_rate_10', 'flow_rate_11', 'flow_rate_12', 'energy_param_1', 'energy_param_2', 'energy_param_3', 'energy_param_4'] 没有回水压力数据?
最新发布
09-13
import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error, r2_score from torch.utils.data import DataLoader, TensorDataset import matplotlib.pyplot as plt from tqdm import tqdm # 固定随机种子,保证复现性 torch.manual_seed(42) np.random.seed(42) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # ------------------------------- 1. 数据加载与预处理 ------------------------------- print("加载数据...") data = pd.read_csv("time_aware_clusters-time step.csv") print("原始数据形状: ", data.shape) seq_len = 90 input_columns = [1, 2, 4] # 控制变量 target_column = 5 # 构造序列数据(包含滞后特征) def create_sequences(df, input_cols, target_col, window_size, lag_steps=3): full_X = df.iloc[:, input_cols].values full_y = df.iloc[:, target_col].values.reshape(-1, 1) full_X[:, 1] = np.log10(full_X[:, 1] + 1e-8) # 滞后特征 y_lagged_list = [] for lag in range(1, lag_steps + 1): y_lag = np.concatenate([np.full((lag, 1), np.nan), full_y[:-lag]]) y_lagged_list.append(y_lag) y_lagged = np.hstack(y_lagged_list) full_X_extended = np.hstack([y_lagged, full_X]) valid_idx = np.arange(lag_steps + window_size - 1, len(full_X_extended)) X, y = [], [] for idx in valid_idx: X.append(full_X_extended[idx - window_size + 1:idx + 1]) y.append(full_y[idx]) X, y = np.array(X), np.array(y) mask = ~np.isnan(X).any(axis=(1, 2)) return X[mask], y[mask] train_data, val_data, test_data = np.split(data, [int(0.8 * len(data)), int(0.9 * len(data))]) X_train, y_train = create_sequences(train_data, input_columns, target_column, seq_len) X_val, y_val = create_sequences(val_data, input_columns, target_column, seq_len) X_test, y_test = create_sequences(test_data, input_columns, target_column, seq_len) # 归一化(滞后特征也归一化) scaler_X = MinMaxScaler().fit(X_train.reshape(-1, X_train.shape[2])) scaler_y = MinMaxScaler().fit(y_train) scaler_u = MinMaxScaler().fit(data.iloc[:, input_columns].values) def normalize(X, scaler): orig_shape = X.shape X_flat = X.reshape(-1, orig_shape[2]) X_norm = scaler.transform(X_flat) return X_norm.reshape(orig_shape) X_train = torch.tensor(normalize(X_train, scaler_X), dtype=torch.float32) y_train = torch.tensor(scaler_y.transform(y_train), dtype=torch.float32) X_val = torch.tensor(normalize(X_val, scaler_X), dtype=torch.float32) y_val = torch.tensor(scaler_y.transform(y_val), dtype=torch.float32) X_test = torch.tensor(normalize(X_test, scaler_X), dtype=torch.float32) # ------------------------------- 2. 改进版模型定义 ------------------------------- class PhysicsLSTM(nn.Module): def __init__(self, input_dim, hidden_dim=121): super().__init__() self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.2) self.fc = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1)) # 改为3层结构 self.physics_net = nn.Sequential( nn.Linear(3, 16), nn.ReLU(), nn.Linear(16, 1)) def forward(self, x): lstm_out, _ = self.lstm(x) pred = self.fc(lstm_out[:, -1, :]) physics = self.physics_net(x[:, -1, -3:]) return pred + 0.1*physics model = PhysicsLSTM(X_train.shape[2]).to(device) model.load_state_dict(torch.load('final_model.pth')) model.eval() # ------------------------------- 3. 改进MPC控制器 ------------------------------- class ImprovedMPCController: def __init__(self, model, scaler_X, scaler_y, scaler_u, control_cols, horizon=15, Q=25, Q_delta=15, Q_integral=5, R=0.8, du_max=0.02): self.model = model self.scaler_X = scaler_X self.scaler_y = scaler_y self.scaler_u = scaler_u self.control_cols = control_cols self.horizon = horizon self.Q = Q self.Q_delta = Q_delta self.Q_integral = Q_integral self.R = R self.du_max = du_max self.u_bounds = np.array([[0.1, 0.1, 0.1], [0.9, 0.9, 0.9]]) self.u_prev = np.array([0.5, 0.5, 0.5]) self.integral_error = 0 self.optimizer = optim.Adam self.lr = 0.1 self.iterations = 200 def batch_predict(self, init_seq, u_seq): seq_batch = np.repeat(init_seq[np.newaxis, :, :], self.horizon, axis=0) preds = [] for t in range(self.horizon): u = np.clip(u_seq[t], self.u_bounds[0], self.u_bounds[1]) seq_batch[:, -1, self.control_cols] = u with torch.no_grad(): pred = self.model(torch.tensor(seq_batch, dtype=torch.float32).to(device)).cpu().numpy() preds.append(pred[:, 0]) if t != self.horizon - 1: seq_batch[:, :-1, :] = seq_batch[:, 1:, :] seq_batch[:, -1, :3] = np.roll(seq_batch[:, -2, :3], -1) seq_batch[:, -1, 2] = pred[:, 0] return np.stack(preds, axis=1).diagonal() def optimize(self, current_seq, target_seq): u_init = np.tile(self.u_prev, (self.horizon, 1)) u_vars = torch.tensor(u_init, dtype=torch.float32, requires_grad=True, device=device) optimizer = self.optimizer([u_vars], lr=self.lr) best_loss = float('inf') best_u = u_init[0].copy() target_tensor = torch.tensor(target_seq[:self.horizon], dtype=torch.float32, device=device) for iter in range(self.iterations): optimizer.zero_grad() # 预测 preds_norm = self.batch_predict(current_seq, u_vars.detach().cpu().numpy()) preds_tensor = torch.tensor(preds_norm, dtype=torch.float32, device=device) # 反归一化 y_pred = torch.tensor(self.scaler_y.inverse_transform(preds_tensor.cpu().numpy().reshape(-1, 1)), dtype=torch.float32, device=device) y_target = torch.tensor(self.scaler_y.inverse_transform(target_tensor.cpu().numpy().reshape(-1, 1)), dtype=torch.float32, device=device) # 计算各项损失 track_loss = torch.mean((y_pred - y_target) ** 2) delta_loss = torch.mean((y_pred[1:] - y_pred[:-1] - (y_target[1:] - y_target[:-1])) ** 2) control_loss = torch.mean((u_vars[1:] - u_vars[:-1]) ** 2) # 积分项 self.integral_error += (y_pred[0] - y_target[0]).item() integral_loss = torch.tensor(self.integral_error ** 2, device=device) # 综合损失函数 loss = (self.Q * track_loss + self.Q_delta * delta_loss + self.R * control_loss + self.Q_integral * integral_loss) loss.backward() optimizer.step() # 约束处理 with torch.no_grad(): # 控制变量平滑 for t in range(1, self.horizon): delta_u = u_vars[t] - u_vars[t - 1] delta_u = torch.clamp(delta_u, -self.du_max, self.du_max) u_vars[t] = u_vars[t - 1] + delta_u # 边界约束 u_vars.data = torch.clamp(u_vars, 0.1, 0.9) # 更新最佳控制 if loss.item() < best_loss: best_loss = loss.item() best_u = u_vars[0].detach().cpu().numpy() self.u_prev = best_u.copy() return best_u # ------------------------------- 4. 改进目标轨迹生成 ------------------------------- def generate_smooth_target(length, start_temp=552.7, end_temp=553.5, max_change=0.15): x = np.linspace(0, 1, length) # 使用S型曲线过渡 transition = 1 / (1 + np.exp(-10 * (x - 0.5))) raw_target = start_temp + (end_temp - start_temp) * transition # 限制最大变化率 for i in range(1, len(raw_target)): delta = raw_target[i] - raw_target[i - 1] if abs(delta) > max_change: raw_target[i] = raw_target[i - 1] + np.sign(delta) * max_change return scaler_y.transform(raw_target.reshape(-1, 1)).flatten() # ------------------------------- 5. 改进可视化 ------------------------------- def plot_improved_results(target_actual, preds_actual, controls_actual): plt.figure(figsize=(16, 8)) # 主图 - 温度跟踪 plt.subplot(2, 1, 1) plt.plot(target_actual, 'r--', linewidth=2, label='目标轨迹') plt.plot(preds_actual, 'b-', linewidth=1.5, label='MPC预测') # 计算并绘制误差带 error = np.abs(preds_actual.flatten() - target_actual.flatten()) plt.fill_between(range(len(preds_actual)), preds_actual.flatten() - error, preds_actual.flatten() + error, color='blue', alpha=0.1) plt.ylabel('温度 (K)') plt.title('改进后的温度控制效果') plt.legend() plt.grid(True, linestyle='--', alpha=0.7) # 控制变量图 plt.subplot(2, 1, 2) for i in range(controls_actual.shape[1]): plt.plot(controls_actual[:, i], label=f'控制变量 {i + 1}') plt.xlabel('时间步') plt.ylabel('控制量') plt.legend() plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.show() # ------------------------------- 6. 主控制仿真程序 ------------------------------- def run_improved_mpc_simulation(): control_cols = [3, 4, 5] # 根据实际数据调整 mpc = ImprovedMPCController(model, scaler_X, scaler_y, scaler_u, control_cols) current_seq = X_test[0].numpy().copy() preds, controls = [], [] steps = min(200, len(X_test) - mpc.horizon) target_seq = generate_smooth_target(steps + mpc.horizon) for i in tqdm(range(steps), desc="MPC控制进度"): target = target_seq[i:i + mpc.horizon] try: u_opt_norm = mpc.optimize(current_seq, target) pred_norm = mpc.batch_predict(current_seq, np.tile(u_opt_norm, (mpc.horizon, 1)))[0] except Exception as e: print(f"第{i}步出错: {str(e)}") break preds.append(pred_norm) controls.append(u_opt_norm) new_seq = current_seq[1:] new_row = current_seq[-1].copy() new_row[:3] = np.roll(new_row[:3], -1) new_row[2] = pred_norm new_row[control_cols] = u_opt_norm current_seq = np.vstack([new_seq, new_row]) preds_actual = scaler_y.inverse_transform(np.array(preds).reshape(-1, 1)) controls_actual = scaler_u.inverse_transform(np.array(controls)) target_actual = scaler_y.inverse_transform(target_seq[:steps].reshape(-1, 1)) r2 = r2_score(target_actual, preds_actual) rmse = np.sqrt(mean_squared_error(target_actual, preds_actual)) print(f"\n最终R²: {r2:.4f}, RMSE: {rmse:.4f}") # 改进的可视化 plot_improved_results(target_actual, preds_actual, controls_actual) # ------------------------------- 运行 ------------------------------- if __name__ == "__main__": run_improved_mpc_simulation()修改代码,加强其运行效果
06-17
import math from datetime import datetime import numpy as np import os import matplotlib.pyplot as plt import matplotlib.font_manager as fm from collections import Counter # --- 字体和样式配置 --- def configure_fonts_and_styles(): chinese_font_name = None try: if os.name == 'nt': test_font = 'SimSun' try: fm.FontProperties(family=test_font).get_name(); chinese_font_name = test_font except RuntimeError: print(f"警告:Windows系统中未直接找到 {test_font} 字体,尝试 'Microsoft YaHei' 或 'SimHei'。") fallback_fonts = ['Microsoft YaHei', 'SimHei'] for f_name in fallback_fonts: try: fm.FontProperties(family=f_name).get_name(); chinese_font_name = f_name; break except RuntimeError: continue elif os.name == 'posix': common_fonts = ['SimSun', 'Songti SC', 'STSong', 'WenQuanYi Micro Hei', 'Noto Serif CJK SC', 'Noto Sans CJK SC'] for f_name in common_fonts: try: if fm.FontProperties(family=f_name).get_name(): chinese_font_name = f_name; break except RuntimeError: continue if chinese_font_name: plt.rcParams['font.family'] = chinese_font_name print(f"中文字体尝试设置为: {chinese_font_name}") else: print("警告:未能自动配置一个特定的中文字体 (如 SimSun - 宋体)。中文可能无法正确显示。") except Exception as e: print(f"设置中文字体时出错: {e}") try: current_serif = plt.rcParams.get('font.serif', []) if 'Times New Roman' not in current_serif: try: fm.FontProperties(family='Times New Roman').get_name() plt.rcParams['font.serif'] = ['Times New Roman'] + current_serif print(f"英文衬线字体 (serif) 将优先尝试: Times New Roman") except RuntimeError: print("警告:系统中未找到 'Times New Roman' 字体。英文衬线字体可能使用默认设置。") except Exception as e: print(f"设置Times New Roman时出错: {e}") font_size = 14 plt.rcParams['font.size'] = font_size; plt.rcParams['axes.titlesize'] = font_size + 4 plt.rcParams['axes.labelsize'] = font_size + 2; plt.rcParams['xtick.labelsize'] = font_size plt.rcParams['ytick.labelsize'] = font_size; plt.rcParams['legend.fontsize'] = font_size # Adjusted legend fontsize plt.rcParams['figure.titlesize'] = font_size + 6; plt.rcParams['axes.unicode_minus'] = False print(f"全局字体大小已设置为基础: {font_size}pt") # --- 配置项 --- BASE_FILE_PATH = r"C:\Users\huang\Desktop\1112W\1112W" CHARGING_DATA_FILE = os.path.join(BASE_FILE_PATH, "Charging_Data.csv") OUTPUT_DIR = "visualizations_layout_adjusted" # 新的输出文件夹 if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) CHARGING_COLUMN_MAPPINGS = { 'power': 'Transaction power/kwh', 'start_time': 'Start Time', 'end_time': 'End Time', 'service_fee': 'Service charge/Yuan', 'end_cause': 'end_cause', 'temperature_direct': 'Temperature(℃)', 'district_charging': 'District Name' } # --- CSV读取与解析辅助函数 --- def read_csv(filepath, column_selection_map=None): data = []; header = [] try: with open(filepath, 'r', encoding='utf-8-sig') as f: lines = f.readlines() if not lines: return [], [] header = [h.strip().replace('"', '') for h in lines[0].strip().split(',')] col_indices = {internal_key: header.index(actual_col_name) if actual_col_name in header else -1 for internal_key, actual_col_name in column_selection_map.items()} if column_selection_map else \ {h: i for i, h in enumerate(header)} for line in lines[1:]: if not line.strip(): continue values = [v.strip().replace('"', '') for v in line.split(',')] if len(values) != len(header): continue row_dict = {internal_key: values[idx] if idx != -1 and idx < len(values) else None for internal_key, idx in col_indices.items()} data.append(row_dict) except FileNotFoundError: print(f"错误:文件 {filepath} 未找到。"); return [], [] except Exception as e: print(f"读取CSV文件 {filepath} 时发生错误: {e}"); return [], [] return data, header def parse_datetime_custom(dt_str, context=""): if not dt_str: return None dt_str_cleaned = dt_str.strip() formats = ["%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M", "%m/%d/%Y %H:%M", "%Y-%m-%d %H:%M", "%Y/%m/%d %H:%M:%S", "%Y%m%d", "%Y-%m-%d"] for fmt in formats: try: return datetime.strptime(dt_str_cleaned, fmt) except ValueError: continue return None # --- 统计计算辅助函数 --- def calculate_mean(data_list): valid_data = [x for x in data_list if x is not None and isinstance(x, (int, float))] if not valid_data: return 0.0 return sum(valid_data) / len(valid_data) def calculate_std_dev(data_list, ddof=1): valid_data_float = [] for x in data_list: if x is not None: try: valid_data_float.append(float(x)) except ValueError: pass if len(valid_data_float) < max(2, ddof): return 0.0 mean = calculate_mean(valid_data_float) variance = sum([(x - mean) ** 2 for x in valid_data_float]) / (len(valid_data_float) - ddof) return math.sqrt(variance) def calculate_median(data_list): valid_data = [x for x in data_list if x is not None and isinstance(x, (int, float))] if not valid_data: return 0.0 sorted_list = sorted(valid_data) n = len(sorted_list) mid = n // 2 if n % 2 == 1: return sorted_list[mid] else: return (sorted_list[mid - 1] + sorted_list[mid]) / 2.0 def calculate_pearson_correlation(list1, list2): paired_values = [] for x, y in zip(list1, list2): if x is not None and y is not None: try: val_x = float(x); val_y = float(y) paired_values.append((val_x, val_y)) except ValueError: continue if len(paired_values) < 2: return 0.0 x_vals = [p[0] for p in paired_values]; y_vals = [p[1] for p in paired_values] n = len(x_vals) if n == 0 : return 0.0 mean1, mean2 = calculate_mean(x_vals), calculate_mean(y_vals) numerator = sum([(x - mean1) * (y - mean2) for x, y in zip(x_vals, y_vals)]) sum_sq_diff1 = sum([(x - mean1) ** 2 for x in x_vals]) sum_sq_diff2 = sum([(y - mean2) ** 2 for y in y_vals]) denominator = math.sqrt(sum_sq_diff1 * sum_sq_diff2) if denominator == 0: return 0.0 return numerator / denominator # --- 手动KDE相关函数 --- def gaussian_kernel(u): return (1 / np.sqrt(2 * np.pi)) * np.exp(-0.5 * u**2) def calculate_kde_bandwidth(data_points_np): n = len(data_points_np) if n == 0: return 0.1 std_dev = calculate_std_dev(list(data_points_np), ddof=1) if std_dev == 0: q75, q25 = np.percentile(data_points_np, [75, 25]) iqr = q75 - q25 A = iqr / 1.349 if iqr > 0 else ( (0.01 * (data_points_np.max() - data_points_np.min())) if data_points_np.max() != data_points_np.min() else 0.01 ) if A <= 1e-5 : A = 0.01 return 0.9 * A * (n ** (-0.2)) return 1.06 * std_dev * (n ** (-0.2)) def manual_kde(data_points_np, x_eval_points_np, bandwidth): n_data = len(data_points_np) if n_data == 0 or bandwidth <= 1e-6: return np.zeros_like(x_eval_points_np) data_points_np = np.asarray(data_points_np).reshape(-1, 1) x_eval_points_np = np.asarray(x_eval_points_np).reshape(1, -1) u = (x_eval_points_np - data_points_np) / bandwidth kernel_values = gaussian_kernel(u) kde_y = np.sum(kernel_values, axis=0) / (n_data * bandwidth) return kde_y # --- 数据处理主函数 --- def process_data(): print("--- 正在加载和处理数据 ---") charging_data_raw, _ = read_csv(CHARGING_DATA_FILE, CHARGING_COLUMN_MAPPINGS) if not charging_data_raw: print("错误:充电数据未能加载或为空。"); return [], {} num_raw_charging_records = len(charging_data_raw) cleaned_data = [] power_zero_skipped = 0; parse_errors_time_start = 0; parse_errors_time_end = 0 value_errors_power = 0; time_logic_errors = 0; missing_power=0; missing_start_time=0; missing_end_time=0 value_errors_fee=0; temp_parse_errors=0 for record in charging_data_raw: power_str = record.get('power'); start_time_str = record.get('start_time'); end_time_str = record.get('end_time') service_fee_str = record.get('service_fee'); end_cause_val = record.get('end_cause', "未知原因") temp_direct_str = record.get('temperature_direct'); district_name_val = record.get('district_charging', "未知区域") if power_str is None: missing_power+=1; continue if start_time_str is None: missing_start_time+=1; continue if end_time_str is None: missing_end_time+=1; continue try: transaction_power = float(power_str) except (ValueError, TypeError): value_errors_power+=1; continue if transaction_power == 0: power_zero_skipped += 1; continue start_time = parse_datetime_custom(start_time_str) if not start_time: parse_errors_time_start+=1; continue end_time = parse_datetime_custom(end_time_str) if not end_time: parse_errors_time_end+=1; continue if end_time < start_time: time_logic_errors+=1; continue duration_hours = (end_time - start_time).total_seconds() / 3600.0 try: service_fee = float(service_fee_str) if service_fee_str else 0.0 except (ValueError, TypeError): service_fee = 0.0; value_errors_fee+=1 final_temperature = None if temp_direct_str is not None and temp_direct_str != "": try: final_temperature = float(temp_direct_str) except (ValueError, TypeError): temp_parse_errors+=1 cleaned_data.append({ 'transaction_power': transaction_power, 'duration_hours': duration_hours, 'district_name': district_name_val, 'service_fee': service_fee, 'end_cause': end_cause_val, 'temperature': final_temperature }) print(f"\n数据清洗和转换完成。原始充电记录: {num_raw_charging_records}") if missing_power > 0: print(f" - 因缺少功率数据跳过: {missing_power} 条") print(f" - 因功率为0跳过: {power_zero_skipped} 条") # ... (可以添加其他错误计数打印) print(f"最终得到 {len(cleaned_data)} 条有效记录用于分析。") district_stats_aggregated = {} temp_district_data = {} for record in cleaned_data: district = record['district_name'] if district not in temp_district_data: temp_district_data[district] = {'powers': [], 'durations': [], 'service_fees': []} if record['transaction_power'] is not None: temp_district_data[district]['powers'].append(record['transaction_power']) if record['duration_hours'] is not None: temp_district_data[district]['durations'].append(record['duration_hours']) if record['service_fee'] is not None: temp_district_data[district]['service_fees'].append(record['service_fee']) for district, data in temp_district_data.items(): district_stats_aggregated[district] = { 'avg_power': calculate_mean(data['powers']), 'std_dev_duration': calculate_std_dev(data['durations']), 'median_service_fee': calculate_median(data['service_fees']), 'count': len(data['powers']) } return cleaned_data, district_stats_aggregated # --- 可视化函数 --- def plot_transaction_power_distribution(cleaned_data, output_dir): if not cleaned_data: return transaction_powers_list = [r['transaction_power'] for r in cleaned_data if r['transaction_power'] is not None and r['transaction_power'] > 0] if not transaction_powers_list: print("无有效的正充电量数据用于绘制分布图。"); return transaction_powers_np = np.array(transaction_powers_list) fig, axes = plt.subplots(1, 2, figsize=(16, 7)) # 略微调整figsize # 子图1: 密度直方图与KDE曲线 ax = axes[0] ax.hist(transaction_powers_np, bins=50, density=True, alpha=0.6, color='skyblue', edgecolor='gray', label='密度直方图') if len(transaction_powers_np) > 1: bandwidth = calculate_kde_bandwidth(transaction_powers_np) if bandwidth > 1e-5 : plot_min = transaction_powers_np.min() - 1 * bandwidth plot_max = transaction_powers_np.max() + 1 * bandwidth x_range = np.linspace(max(0, plot_min), plot_max, 500) kde_y = manual_kde(transaction_powers_np, x_range, bandwidth) ax.plot(x_range, kde_y, color='darkviolet', linewidth=2.5, label='KDE曲线') ax.set_title('充电量分布 (直方图与KDE)'); ax.set_xlabel('充电量 (kWh)'); ax.set_ylabel('密度') ax.legend(); ax.grid(axis='y', alpha=0.75); ax.set_xlim(left=0) # 子图2: 箱线图 ax = axes[1] bp = ax.boxplot(transaction_powers_np, vert=False, widths=0.7, patch_artist=True, boxprops=dict(facecolor='lightgreen', color='black'), medianprops=dict(color='red', linewidth=2)) ax.set_title('充电量分布 (箱线图)'); ax.set_xlabel('充电量 (kWh)'); ax.set_yticklabels([]) ax.grid(axis='x', alpha=0.75); ax.set_xlim(left=0) fig.suptitle('充电量 Transaction Power 分布概览', fontweight='bold') fig.tight_layout(rect=[0, 0.05, 1, 0.93]) # rect=[left, bottom, right, top] save_path = os.path.join(output_dir, "transaction_power_distribution_kde.png") plt.savefig(save_path); print(f"充电量分布图 (含KDE) 已保存至: {save_path}") def plot_power_by_end_cause(cleaned_data, output_dir, top_n=5): if not cleaned_data: return end_cause_data = {} for record in cleaned_data: cause = record['end_cause'] if record['end_cause'] else "未知"; power = record['transaction_power'] if power is None: continue if cause not in end_cause_data: end_cause_data[cause] = [] end_cause_data[cause].append(power) if not end_cause_data: print("无有效的充电终止原因数据用于绘图。"); return cause_counts = Counter({cause: len(powers) for cause, powers in end_cause_data.items()}) common_causes_original = [cause for cause, count in cause_counts.most_common(top_n)] # 确保标签不会过长 common_causes = [ (c[:30] + '...' if len(c) > 30 else c) for c in common_causes_original] plot_data = [end_cause_data[cause_orig] for cause_orig in common_causes_original if cause_orig in end_cause_data] # Use original cause for data lookup if not plot_data: print(f"筛选后(前{top_n}个)无充电终止原因数据用于绘图。"); return fig, ax = plt.subplots(figsize=(max(12, top_n * 1.5), 10)) # 动态宽度,增加高度 bp = ax.boxplot(plot_data, vert=True, patch_artist=True, labels=common_causes, widths=0.6) try: # Matplotlib 3.7+ cmap = plt.colormaps.get_cmap('Pastel2') color_list = cmap(np.linspace(0, 1, len(plot_data))) except AttributeError: # Fallback for older Matplotlib cmap = plt.cm.get_cmap('Pastel2', len(plot_data)) color_list = cmap.colors if hasattr(cmap, 'colors') else [cmap(i) for i in np.linspace(0, 1, len(plot_data))] for patch, color in zip(bp['boxes'], color_list): patch.set_facecolor(color) for median in bp['medians']: median.set_color('black'); median.set_linewidth(2) ax.set_title(f'不同充电终止原因下的充电量分布 (Top {top_n})', fontweight='bold') ax.set_ylabel('充电量 (kWh)'); ax.set_xlabel('充电终止原因') tick_label_fontsize = plt.rcParams['xtick.labelsize'] - (2 if top_n > 5 else 0) # 如果类别多,稍微减小字号 if tick_label_fontsize < 8: tick_label_fontsize = 8 # 最小字号 plt.setp(ax.get_xticklabels(), rotation=30, ha="right", fontsize=tick_label_fontsize) ax.grid(axis='y', linestyle='--', alpha=0.7) # 调整 rect 的 bottom 值,给 x 轴标签更多空间 fig.tight_layout(rect=[0, 0.20 if top_n > 3 else 0.1, 1, 0.95]) # 如果标签多,底部空间给大一点 save_path = os.path.join(output_dir, f"power_by_end_cause_top{top_n}.png") plt.savefig(save_path); print(f"按终止原因分布的充电量图 (Top {top_n}) 已保存至: {save_path}") def plot_temp_vs_power_scatter(cleaned_data, output_dir): if not cleaned_data: return temperatures = [r['temperature'] for r in cleaned_data if r['temperature'] is not None and r['transaction_power'] is not None] powers = [r['transaction_power'] for r in cleaned_data if r['temperature'] is not None and r['transaction_power'] is not None] if not temperatures or not powers or len(temperatures) < 2: print("无足够的有效温度和充电量数据用于绘制散点图。"); return fig, ax = plt.subplots(figsize=(12, 7)) ax.scatter(temperatures, powers, alpha=0.4, edgecolors='w', s=40, c='steelblue') ax.set_title('温度与充电量关系散点图', fontweight='bold'); ax.set_xlabel('温度 (℃)'); ax.set_ylabel('充电量 (kWh)') ax.grid(True, linestyle='--', alpha=0.7); fig.tight_layout() save_path = os.path.join(output_dir, "temperature_vs_power_scatter.png") plt.savefig(save_path); print(f"温度与充电量散点图已保存至: {save_path}") def plot_district_statistics(district_stats, output_dir): if not district_stats: print("无有效的区域统计数据用于绘图。"); return sorted_districts_items = sorted(district_stats.items()) if not sorted_districts_items: print("排序后无区域数据用于绘图。"); return districts = [item[0] for item in sorted_districts_items] avg_powers = [item[1]['avg_power'] for item in sorted_districts_items] std_dev_durations = [item[1]['std_dev_duration'] for item in sorted_districts_items] median_service_fees = [item[1]['median_service_fee'] for item in sorted_districts_items] x = np.arange(len(districts)); width = 0.35 fig_base_width = 8 fig_width = max(fig_base_width, len(districts) * 1.5 + 2) # 动态计算宽度 fig_height = 8 # 增加默认高度 def _plot_bar_chart(ax, x_data, y_data, width, label_text, color, y_label_text, title_text, districts_labels, bar_label_padding=5): rects = ax.bar(x_data, y_data, width, label=label_text, color=color) ax.set_ylabel(y_label_text); ax.set_title(title_text, fontweight='bold') ax.set_xticks(x_data); ax.set_xticklabels(districts_labels, rotation=45, ha="right") ax.legend(loc='upper right'); ax.bar_label(rects, padding=bar_label_padding, fmt='%.2f') # 自动调整y轴上限以确保标签可见 if y_data: max_val_with_label = max(y_data) * 1.15 # 假设标签不会超出bar高度的15% current_top = ax.get_ylim()[1] ax.set_ylim(top=max(current_top, max_val_with_label)) # 图1: 平均充电量 fig1, ax1 = plt.subplots(figsize=(fig_width, fig_height)); _plot_bar_chart(ax1, x, avg_powers, width, '平均充电量', 'cornflowerblue', '平均充电量 (kWh)', '各区域平均充电量', districts) fig1.tight_layout(rect=[0, 0.25, 1, 0.93]) # 增加底部和顶部边距 save_path1 = os.path.join(output_dir, "district_avg_power.png"); plt.savefig(save_path1); print(f"区域平均充电量图已保存至: {save_path1}") # 图2: 充电时长标准差 fig2, ax2 = plt.subplots(figsize=(fig_width, fig_height)); _plot_bar_chart(ax2, x, std_dev_durations, width, '充电时长标准差', 'lightcoral', '充电时长标准差 (小时)', '各区域充电时长标准差', districts) fig2.tight_layout(rect=[0, 0.25, 1, 0.93]) save_path2 = os.path.join(output_dir, "district_duration_stddev.png"); plt.savefig(save_path2); print(f"区域充电时长标准差图已保存至: {save_path2}") # 图3: 服务费中位数 fig3, ax3 = plt.subplots(figsize=(fig_width, fig_height)); _plot_bar_chart(ax3, x, median_service_fees, width, '服务费中位数', 'mediumseagreen', '服务费中位数 (元)', '各区域服务费中位数', districts) fig3.tight_layout(rect=[0, 0.25, 1, 0.93]) save_path3 = os.path.join(output_dir, "district_median_service_fee.png"); plt.savefig(save_path3); print(f"区域服务费中位数图已保存至: {save_path3}") # --- 主函数 --- if __name__ == '__main__': configure_fonts_and_styles() cleaned_data, district_stats_aggregated = process_data() if cleaned_data: print("\n--- 开始生成可视化图表与分析 ---") print("\n--- Pearson 相关系数计算 ---") temperatures_for_corr = [r['temperature'] for r in cleaned_data] powers_for_corr = [r['transaction_power'] for r in cleaned_data] pearson_corr = calculate_pearson_correlation(temperatures_for_corr, powers_for_corr) valid_pairs_count = len([1 for t, p in zip(temperatures_for_corr, powers_for_corr) if t is not None and p is not None and isinstance(t, (int,float)) and isinstance(p, (int,float))]) if valid_pairs_count > 1: print(f"Temperature与Transaction power的Pearson相关系数: {pearson_corr:.4f} (基于 {valid_pairs_count} 个有效数据点对)") else: print("没有足够的有效数据来计算Temperature与Transaction power的Pearson相关系数。") print("\n--- 生成图表 ---") plot_transaction_power_distribution(cleaned_data, OUTPUT_DIR) plot_power_by_end_cause(cleaned_data, OUTPUT_DIR, top_n=5) plot_temp_vs_power_scatter(cleaned_data, OUTPUT_DIR) if district_stats_aggregated: plot_district_statistics(district_stats_aggregated, OUTPUT_DIR) else: print("无区域统计数据,跳过相关图表绘制。") print(f"\n所有图表已尝试保存到 '{OUTPUT_DIR}' 文件夹中。") print("正在尝试显示所有图表...") plt.show() print("所有图表窗口已尝试显示完毕。") else: print("没有清洗后的数据可用于可视化。") print("\n可视化任务结束。")以上代码改写文件路径C:\Users\huang\Desktop\1112W\Charging_Data.csv,计算充电时间(小时) [‘Duration’] = [‘End Time’] -[‘Start Time’]).并进行数据清洗,处理Transaction power=0的异常记录如故障导致无效充电则以上代码能够实现该功能代码是什么
06-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值