CURRENT_BONUS(p)

进程红利计算
本文介绍了一个用于计算进程红利的公式:CURRENT_BONUS(p),该公式基于进程p的平均睡眠时间(sleep_avg)进行计算。具体计算方式为将进程的平均睡眠时间转换为jiffies单位后乘以最大红利再除以最大平均睡眠时间。
CURRENT_BONUS(p)根据sleep_avg计算进程p的当前红利(bonus)

#define CURRENT_BONUS(p)
    ( NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / MAX_SLEEP_AVG )

例如:
p->sleep_avg = 600ms
CURRENT_BONUS(p) = 6
----------------------------------
CURRENT_BONUS(p) --> NS_TO_JIFFIES(600 000 000ns) * 10 / 1000
--> 600 * 10 /1000 --> 6



procedure proc_annual_achv_award(p_salary_month in date, p_organ_code in varchar2) is cursor cur_comm_sum(c_agent_code varchar2, c_start_date date, c_end_date date) is select nvl(sum(a.commission), 0) + nvl(sum(a.company_commission), 0) from t_grp_sal_info a where a.salary_date between c_start_date and c_end_date and a.agent_code = c_agent_code; v_comm_sum t_grp_sal_info.commission%type; v_bonus_rate t_grp_base_salary.base_salary%type; v_bonus t_grp_base_salary.base_salary%type; v_salary_type t_base_salary_type.base_salary_type%type; v_bonus_type t_bonus_type.bonus_type%type; v_agent_grade_tmp t_agent_grade.agent_grade%type := '121'; v_prem_achi_rate number(18, 2); v_start_date date; v_end_date date; v_combined_ratio t_grp_org_info_setting.set_value%type; begin if to_char(p_salary_month, 'mm') <> '12' then return; end if; v_salary_type := '6'; v_bonus_type := 4; v_start_date := add_months(p_salary_month, -11); v_end_date := add_months(p_salary_month, -1); for r in (select b.commission + b.company_commission as commission, a.* from t_grp_agt_grade_trace a, t_grp_sal_info_tmp b where a.salary_date = p_salary_month and a.agent_code = b.agent_code and a.salary_date = b.salary_date and trunc(a.perm_date, 'mm') <= p_salary_month and exists (select 'x' from t_company_organ b, t_company_organ c where b.organ_code = a.organ_code and b.parent_id = c.organ_id and c.organ_code = p_organ_code)) loop open cur_comm_sum(r.agent_code, v_start_date, v_end_date); fetch cur_comm_sum into v_comm_sum; close cur_comm_sum; v_comm_sum := v_comm_sum + r.commission; --年度个人考核业绩指标超额达成率 v_prem_achi_rate := func_get_prem_achi_rate(r.agent_code, v_start_date, p_salary_month); --获取系数 v_bonus_rate := func_get_salary_rate(v_salary_type, r.organ_code, p_salary_month, v_agent_grade_tmp, v_prem_achi_rate); v_bonus := v_comm_sum * 0.05 * v_bonus_rate; update t_grp_sal_info_tmp a set a.annual_achv_award = v_bonus where a.agent_code = r.agent_code and a.salary_date = p_salary_month; --记录明细数据 proc_insert_bonus_detail(p_salary_month, r.agent_code, v_bonus_type, 1, v_comm_sum); proc_insert_bonus_detail(p_salary_month, r.agent_code, v_bonus_type, 2, v_bonus_rate); proc_insert_bonus_detail(p_salary_month, r.agent_code, v_bonus_type, 3, v_bonus); proc_insert_bonus_detail(p_salary_month, r.agent_code, v_bonus_type, 4, v_combined_ratio); end loop; proc_insert_sal_trace('01', p_organ_code, p_salary_month, 'proc_annual_achv_award年度超额达成奖计算完毕'); end proc_annual_achv_award; 修改代码 表 T_COMPANY_ORGAN 有organ_code,organ_id,parent_id,class_id :organ_code和organ_id是对应的,parent_id是其上级机构 表 t_grp_org_info_setting 中有 organ_code,set_value 现在要根据r.organ_code,获得t_grp_org_info_setting中的set_value,查到则设置v_combined_ratio=1,如果查不到, 则获得该organ_code的上一级机构(只查到class_id=2,不再查更高的层级)对应的set_value,查到则设置v_combined_ratio=1,如果还是不存在,则设置v_combined_ratio=0 最终v_bonus := v_comm_sum * 0.05 * v_bonus_rate*v_combine
最新发布
11-07
def reward_shaping(_obs, _extra_info, act, agent): fp = agent.preprocess junction_ids = fp.get_sorted_junction_ids() j_id_to_act_idx = {j_id: idx for idx, j_id in enumerate(junction_ids)} rewards = {j_id: {'total': 0.0, 'components': {}} for j_id in junction_ids} # 基础数据获取 frame_state = _obs["framestate"] vehicles = frame_state["vehicles"] phases = frame_state["phases"] fp.update_traffic_info(_obs, _extra_info) # 核心指标 all_junction_waiting = fp.get_all_junction_waiting_time(vehicles) invalid_lanes = fp.get_invalid_lanes() global_avg_queue = fp.get_all_avg_queue() is_global_congested = global_avg_queue > 5.0 # 场景状态 weather_map = {0: "晴", 1: "雨", 2: "雪", 3: "雾"} weather = fp.get_weather() weather_name = weather_map.get(weather, "未知") is_peak = fp.is_peak_hour() # 1. 天气影响系数 weather_impact = { 0: {"delay": 1.0, "waiting": 1.0, "queue": 1.0, "travel": 1.0, "coord": 1.0}, # 晴天:无影响 1: {"delay": 1.15, "waiting": 1.1, "queue": 1.15, "travel": 0.85, "coord": 1.15}, # 雨天:轻度影响 2: {"delay": 1.35, "waiting": 1.25, "queue": 1.35, "travel": 0.75, "coord": 1.35}, # 雪天:中度影响 3: {"delay": 1.5, "waiting": 1.4, "queue": 1.5, "travel": 0.65, "coord": 1.5} # 雾霾天:重度影响 } weather_factors = weather_impact.get(weather, weather_impact[0]) # 1. 基础指标缓存 if not hasattr(agent, 'prev_metrics'): agent.prev_metrics = {} for j_id in junction_ids: capacity = fp.get_junction_capacity(j_id) agent.prev_metrics[j_id] = { "avg_delay": 5.0 + capacity * 0.01, "avg_waiting": 3.0 + capacity * 0.005, "avg_queue": 2.0 + capacity * 0.002, "travel_reward": 0.0 } # 2. EMA延误平滑缓存 if not hasattr(agent, 'ema_delay'): agent.ema_delay = {j_id: 0.0 for j_id in junction_ids} # 3. 相位拥堵状态缓存 if not hasattr(agent, 'prev_phase_speeds'): agent.prev_phase_speeds = {j_id: 0.0 for j_id in junction_ids} # 4. 连续趋势缓存(记录最近3步指标,用于额外奖励) if not hasattr(agent, 'prev_metrics_trend'): agent.prev_metrics_trend = { j_id: {"delay": [], "queue": []} for j_id in junction_ids } alpha = 0.3 # 滑动平均系数 # 安全sigmoid函数(避免exp溢出) def sigmoid_scale(x, sensitivity=1.0): x_clamped = np.clip(x, -1000, 1000) return 2.0 / (1 + np.exp(-sensitivity * x_clamped)) - 1.0 # 逐路口计算奖励(核心逻辑:应用天气影响) for j_id in junction_ids: junction = fp.junction_dict[j_id] signal_id = junction["signal"] capacity = fp.get_junction_capacity(j_id) region_id = fp.get_region(j_id) # 1. 延误奖励:应用天气影响(连续改善奖励不乘系数) current_avg_delay = fp.get_junction_avg_delay(j_id) agent.ema_delay[j_id] = 0.8 * agent.ema_delay[j_id] + 0.2 * current_avg_delay current_avg_delay_smoothed = agent.ema_delay[j_id] prev_avg_delay = agent.prev_metrics[j_id]["avg_delay"] delay_delta = prev_avg_delay - current_avg_delay_smoothed delay_change = delay_delta / max(1, current_avg_delay) * 10 # 10秒窗口归一化 delay_reward = 0.4 * sigmoid_scale(delay_change, sensitivity=0.8) delay_reward *= weather_factors["delay"] # 应用天气对延误的差异化影响 # 连续3步延误下降奖励(不乘天气系数) agent.prev_metrics_trend[j_id]["delay"].append(current_avg_delay) if len(agent.prev_metrics_trend[j_id]["delay"]) > 3: agent.prev_metrics_trend[j_id]["delay"].pop(0) if len(agent.prev_metrics_trend[j_id]["delay"]) == 3: if (agent.prev_metrics_trend[j_id]["delay"][0] > agent.prev_metrics_trend[j_id]["delay"][1] and agent.prev_metrics_trend[j_id]["delay"][1] > agent.prev_metrics_trend[j_id]["delay"][2]): delay_reward += 0.1 # 额外正向激励 rewards[j_id]['components']['delay'] = delay_reward # 2. 等待时间奖励:用到天气影响 current_avg_waiting = all_junction_waiting.get(j_id, 0.0) prev_avg_waiting = agent.prev_metrics[j_id]["avg_waiting"] waiting_delta = prev_avg_waiting - current_avg_waiting waiting_change = waiting_delta / max(1, capacity) * 5 # 容量归一化 waiting_reward = 0.3 * sigmoid_scale(waiting_change, sensitivity=0.8) waiting_reward *= weather_factors["waiting"] # 应用天气对等待的影响 rewards[j_id]['components']['waiting'] = waiting_reward # 3. 排队长度奖励 current_avg_queue = fp.get_junction_avg_queue(j_id) prev_avg_queue = agent.prev_metrics[j_id]["avg_queue"] queue_delta = prev_avg_queue - current_avg_queue normalized_queue = current_avg_queue / max(1, capacity) # 容量归一化 queue_delta_normalized = queue_delta / max(1, capacity) # 基础排队奖励 queue_reward = ( 0.2 * sigmoid_scale(queue_delta_normalized * 5.0, sensitivity=0.8) - 0.25 * weather_factors["queue"] * sigmoid_scale(normalized_queue, sensitivity=2.0) ) # 连续3步排队下降奖励(不乘天气系数) agent.prev_metrics_trend[j_id]["queue"].append(current_avg_queue) if len(agent.prev_metrics_trend[j_id]["queue"]) > 3: agent.prev_metrics_trend[j_id]["queue"].pop(0) if len(agent.prev_metrics_trend[j_id]["queue"]) == 3: if (agent.prev_metrics_trend[j_id]["queue"][0] > agent.prev_metrics_trend[j_id]["queue"][1] and agent.prev_metrics_trend[j_id]["queue"][1] > agent.prev_metrics_trend[j_id]["queue"][2]): queue_reward += 0.1 # 额外正向激励 rewards[j_id]['components']['queue'] = queue_reward # 4. 相位奖励:天气适配 phase_remaining = fp.get_phase_remaining_time(signal_id) current_phase = -1 for p in phases: if p["s_id"] == signal_id: current_phase = p["phase_id"] break # 相位车道获取 j_idx = j_id_to_act_idx[j_id] current_act = act[j_idx] if len(act) > j_idx else -1 signal_id = junction["signal"] phase_lanes = fp.get_phase_lanes(signal_id, current_act) # 使用新增接口 if not phase_lanes: # 降级处理逻辑 if hasattr(agent, 'logger'): agent.logger.warning(f"Junction {j_id}: 相位{current_act}无映射车道,使用进口道") phase_lanes = junction.get("cached_enter_lanes", []) # 基础相位奖励(天气越差,奖励越平缓,避免过度惩罚) phase_lane_queue = sum(len(fp.lane_volume.get(lane, [])) for lane in phase_lanes) enter_lanes = junction.get("cached_enter_lanes", []) total_lane_queue = sum(len(fp.lane_volume.get(lane, [])) for lane in enter_lanes) demand_ratio = phase_lane_queue / (total_lane_queue + 1e-5) if total_lane_queue > 0 else 0.5 phase_reward = 0.15 * (demand_ratio + 0.1) * max(0, 1 - phase_remaining / 5) phase_reward *= 1.0 / weather_factors["delay"] # 恶劣天气降低相位奖励波动 # 车辆启动激励(不受天气影响) if hasattr(fp, 'lane_congestion'): phase_congestion = [fp.lane_congestion.get(lane, 0) for lane in phase_lanes] avg_phase_congestion = np.mean(phase_congestion) if phase_congestion else 0.0 prev_phase_congestion = agent.prev_phase_speeds[j_id] if avg_phase_congestion < 0.5 and prev_phase_congestion >= 0.5: phase_reward += 0.05 # 额外激励车辆启动 agent.prev_phase_speeds[j_id] = avg_phase_congestion rewards[j_id]['components']['phase'] = phase_reward # 5. 通行奖励:应用天气影响(天气差时降低通行奖励) travel_reward = calculate_travel_reward(j_id, fp, vehicles, invalid_lanes) travel_reward *= weather_factors["travel"] # 应用天气对通行的影响 rewards[j_id]['components']['travel'] = 0.15 * travel_reward # 6. 区域协调奖励:应用天气影响(天气差时加强协调需求) region_avg_queue = fp.get_region_avg_queue(region_id) queue_deviation = abs(current_avg_queue - region_avg_queue) region_congestion = region_avg_queue / max(1, fp.get_region_capacity(region_id)) # 天气越差,协调因子越大(加强协调惩罚/奖励) coordination_factor = (1.0 + 2.0 * min(1.0, region_congestion)) * weather_factors["coord"] # 基础协调惩罚(偏差越大,惩罚越重) coordination_penalty = -0.1 * coordination_factor * sigmoid_scale(queue_deviation, sensitivity=0.5) # 区域相位协同奖励 if region_id != -1 and current_phase != -1 and hasattr(fp, 'region_dict'): region_junctions = fp.region_dict.get(region_id, []) if len(region_junctions) > 1: phase_diffs = [] for j_id_near in region_junctions: if j_id_near == j_id: continue signal_near = fp.junction_dict[j_id_near]["signal"] phase_near = -1 for p in phases: if p["s_id"] == signal_near: phase_near = p["phase_id"] break if phase_near != -1: phase_diffs.append(abs(current_phase - phase_near)) if phase_diffs: avg_phase_diff = np.mean(phase_diffs) coordination_penalty += 0.05 * sigmoid_scale(-avg_phase_diff, sensitivity=0.1) rewards[j_id]['components']['coordination'] = coordination_penalty # 7. 动态权重与总奖励计算(保障归一化与稳定) # 高峰期/平峰期动态权重 weights = { 'delay': 0.25, 'waiting': 0.20, 'queue': 0.15, 'phase': 0.15, 'travel': 0.15, 'coordination': 0.10 } if is_peak else { 'delay': 0.20, 'waiting': 0.15, 'queue': 0.15, 'phase': 0.15, 'travel': 0.20, 'coordination': 0.15 } # 强制权重归一化(避免维度失衡) weight_sum = sum(weights.values()) for key in weights: weights[key] /= weight_sum # 动态基线(前期鼓励探索,后期收敛) base_reward = 0.2 if hasattr(agent, 'train_step'): decay_step = min(agent.train_step, 1000) base_reward = 0.2 - 0.15 * (decay_step / 1000) # 1000步后降至0.05 # 总奖励计算([-1,1],防止梯度爆炸) total_reward = base_reward + sum(weights[k] * rewards[j_id]['components'][k] for k in weights) rewards[j_id]['total'] = np.clip(total_reward, -1.0, 1.0) # 8. 更新历史缓存(滑动平均,平滑指标) agent.prev_metrics[j_id] = { "avg_delay": alpha * current_avg_delay + (1 - alpha) * prev_avg_delay, "avg_waiting": alpha * current_avg_waiting + (1 - alpha) * prev_avg_waiting, "avg_queue": alpha * current_avg_queue + (1 - alpha) * prev_avg_queue, "travel_reward": alpha * travel_reward + (1 - alpha) * agent.prev_metrics[j_id]["travel_reward"] } # 日志输出 if hasattr(agent, 'train_step') and agent.train_step % 100 == 0 and hasattr(agent, 'logger'): for j_id in junction_ids: comp = rewards[j_id]['components'] region_id = fp.get_region(j_id) region_congestion = fp.get_region_avg_queue(region_id) / max(1, fp.get_region_capacity(region_id)) agent.logger.info( f"Step {agent.train_step} | Junc {j_id} (Region {region_id}) - " f"Delay: {comp['delay']:.2f}, Wait: {comp['waiting']:.2f}, " f"Queue: {comp['queue']:.2f}, Phase: {comp['phase']:.2f}, " f"Travel: {comp['travel']:.2f}, Coord: {comp['coordination']:.2f} | " f"Congestion: {region_congestion:.2f}, Peak: {is_peak}, Weather: {weather_name} " f"(Factors: D:{weather_factors['delay']}, W:{weather_factors['waiting']}, Q:{weather_factors['queue']}) | " f"Total: {rewards[j_id]['total']:.2f}" ) # 返回各路口总奖励(tuple格式) return tuple(rewards[j_id]['total'] for j_id in junction_ids) def calculate_travel_reward(junction_id, fp, vehicles, invalid_lanes, distance_scale=100.0): total_distance = 0.0 valid_count = 0 completed_count = 0 base_entry_reward = 5.0 # 基础通行奖励 rewarded_completed = set() # 已奖励的通过车辆 vehicle_dict = {v["v_id"]: v for v in vehicles} vehicle_type_weight = {1: 1.0, 2: 1.5, 3: 1.2, 4: 0.8, 5: 0.5} # 车型权重(优先保障大车通行) current_v_ids = {v["v_id"] for v in vehicles} completed_vehicles = fp.junction_metrics[junction_id]["completed_vehicles"] # 遍历车辆行驶距离,计算奖励 for v_id, distance in fp.vehicle_distance_store.items(): if v_id not in current_v_ids or v_id not in vehicle_dict: continue vehicle = vehicle_dict[v_id] # 1. 行驶中车辆奖励(进口道/路口内,排除无效车道和异常车辆) if (vehicle["target_junction"] == junction_id and (on_enter_lane(vehicle, invalid_lanes) or in_junction(vehicle)) and v_id not in completed_vehicles): if vehicle["lane"] in invalid_lanes or fp.vehicle_status.get(v_id, 0) != 0: continue # 车型加权(大车权重更高,鼓励优先通行) v_type = fp.vehicle_configs[vehicle["v_config_id"]]["v_type"] weight = vehicle_type_weight.get(v_type, 1.0) total_distance += (distance / distance_scale) * weight # 距离归一化(避免数值过大) valid_count += 1 # 2. 已通过车辆奖励(严格去重,限制最大奖励数避免占比过高) if v_id in completed_vehicles and v_id not in rewarded_completed and completed_count < 5: total_distance += base_entry_reward * 2 # 双倍奖励,鼓励车辆快速流出路口 completed_count += 1 rewarded_completed.add(v_id) # 标记为已奖励,避免重复 # 计算最终通行奖励(用tanh限制范围在[-1,1],保障训练稳定) total_count = valid_count + completed_count if total_count > 0: avg_distance = total_distance / total_count count_bonus = min(0.3, total_count * 0.01) # 车辆数量奖励(上限0.3,避免过度影响) return np.tanh(avg_distance + count_bonus) else: return 0.1根据以上分析帮我修改此代码
08-29
def reward_shaping(_obs, _extra_info, act, agent): """奖励函数(修复exp溢出问题)""" fp = agent.preprocess junction_ids = fp.get_sorted_junction_ids() j_id_to_act_idx = {j_id: idx for idx, j_id in enumerate(junction_ids)} rewards = {j_id: {'total': 0.0, 'components': {}} for j_id in junction_ids} frame_state = _obs["framestate"] vehicles = frame_state["vehicles"] phases = frame_state["phases"] # 更新交通信息 fp.update_traffic_info(_obs, _extra_info) # 核心指标获取 all_junction_waiting = fp.get_all_junction_waiting_time(vehicles) invalid_lanes = fp.get_invalid_lanes() global_avg_queue = fp.get_all_avg_queue() is_global_congested = global_avg_queue > 5.0 # 历史缓存初始化 if not hasattr(agent, 'prev_metrics'): base_delay = 8.0 if not fp.is_peak_hour() else 12.0 base_waiting = 5.0 if not fp.is_peak_hour() else 8.0 base_queue = 3.0 if not fp.is_peak_hour() else 5.0 agent.prev_metrics = { j_id: { "avg_delay": base_delay, "avg_waiting": base_waiting, "avg_queue": base_queue, "travel_reward": 0.0 } for j_id in junction_ids } alpha = 0.3 # 场景状态 is_peak = fp.is_peak_hour() weather = fp.get_weather() weather_penalty = {0: 1.0, 1: 1.3, 2: 1.5, 3: 1.4}.get(weather, 1.0) # 修复1:定义安全的sigmoid函数,限制输入范围 def sigmoid_scale(x, sensitivity=1.0): # 将输入x限制在[-1000, 1000],避免exp溢出 x_clamped = np.clip(x, -1000, 1000) return 2.0 / (1 + np.exp(-sensitivity * x_clamped)) - 1.0 for j_id in junction_ids: junction = fp.junction_dict[j_id] signal_id = junction["signal"] # 延误奖励(使用修复后的sigmoid) current_avg_delay = fp.get_junction_avg_delay(j_id) prev_avg_delay = agent.prev_metrics[j_id]["avg_delay"] delay_delta = prev_avg_delay - current_avg_delay # 修复2:对delta进行缩放,避免极端值(如果delay单位是毫秒,可能需要转换为秒) delay_delta_clamped = np.clip(delay_delta, -100, 100) # 直接使用秒 waiting_delta_clamped = np.clip(waiting_delta, -100, 100) delay_reward = 0.4 * sigmoid_scale(delay_delta_clamped, sensitivity=0.8) rewards[j_id]['components']['delay'] = delay_reward # 其他奖励计算(等待/排队/相位等,同理处理极端值) current_avg_waiting = all_junction_waiting.get(j_id, 0.0) prev_avg_waiting = agent.prev_metrics[j_id]["avg_waiting"] waiting_delta = (prev_avg_waiting - current_avg_waiting) / 1000.0 # 单位转换 waiting_delta_clamped = np.clip(waiting_delta, -100, 100) waiting_reward = 0.3 * sigmoid_scale(waiting_delta_clamped, sensitivity=0.8) rewards[j_id]['components']['waiting'] = waiting_reward # 排队奖励 current_avg_queue = fp.get_junction_avg_queue(j_id) prev_avg_queue = agent.prev_metrics[j_id]["avg_queue"] queue_delta = prev_avg_queue - current_avg_queue queue_delta_clamped = np.clip(queue_delta, -20, 20) # 排队长度变化通常不会太大 queue_reward = ( 0.2 * sigmoid_scale(queue_delta_clamped, sensitivity=0.8) - 0.25 * weather_penalty * sigmoid_scale(current_avg_queue / 4.0, sensitivity=0.8) ) rewards[j_id]['components']['queue'] = queue_reward # 5.4 相位奖励(基于实际相位-车道映射) phase_remaining = fp.get_phase_remaining_time(signal_id) current_phase = -1 for p in phases: if p["s_id"] == signal_id: current_phase = p["phase_id"] break j_idx = j_id_to_act_idx[j_id] current_act = act[j_idx] if len(act) > j_idx else -1 # 关键:使用FeatureProcess的相位-车道映射 phase_lane_queue = 0 total_lane_queue = 0 if hasattr(fp, 'phase_lane_mapping') and signal_id in fp.phase_lane_mapping: phase_lanes = fp.phase_lane_mapping[signal_id].get(current_act, []) phase_lane_queue = sum(len(fp.lane_volume.get(lane, [])) for lane in phase_lanes) total_lane_queue = sum(len(v) for v in fp.lane_volume.values()) else: # 降级:无映射时用路口进口道总排队 enter_lanes = [lane for dirs in junction["enter_lanes_on_directions"] for lane in dirs["lanes"]] phase_lane_queue = sum(len(fp.lane_volume.get(lane, [])) for lane in enter_lanes) total_lane_queue = phase_lane_queue urgency = max(0, 1 - phase_remaining / 5) demand_ratio = phase_lane_queue / (total_lane_queue + 1e-5) if total_lane_queue > 0 else 0.5 phase_reward = 0.15 * (demand_ratio + 0.1) * urgency # 无车时给基础奖励 rewards[j_id]['components']['phase'] = phase_reward # 5.5 通行奖励(初期激励+车型加权) travel_reward = calculate_travel_reward(j_id, fp, vehicles, invalid_lanes) rewards[j_id]['components']['travel'] = 0.15 * travel_reward # 提升权重 # 5.6 全局协调(拥堵时加倍惩罚) queue_deviation = abs(current_avg_queue - global_avg_queue) coordination_factor = 2.0 if is_global_congested else 1.0 coordination_penalty = -0.05 * coordination_factor * np.tanh(queue_deviation / 3.0) rewards[j_id]['components']['coordination'] = coordination_penalty # 6. 动态权重(严格归一化,场景适配) if is_peak: weights = { 'delay': 0.4, 'waiting': 0.3, 'queue': 0.15, 'phase': 0.1, 'travel': 0.05, 'coordination': 0.0 } else: weights = { 'delay': 0.25, 'waiting': 0.2, 'queue': 0.15, 'phase': 0.15, 'travel': 0.2, 'coordination': 0.05 } assert abs(sum(weights.values()) - 1.0) < 1e-6, "权重总和必须为1" # 7. 总奖励计算(加基线,避免长期负奖励) total_reward = sum(weights[k] * rewards[j_id]['components'][k] for k in weights) total_reward += 0.05 # 正向基线 rewards[j_id]['total'] = np.clip(total_reward, -1.0, 1.0) # 8. 滑动平均更新 agent.prev_metrics[j_id] = { "avg_delay": alpha * current_avg_delay + (1 - alpha) * prev_avg_delay, "avg_waiting": alpha * current_avg_waiting + (1 - alpha) * prev_avg_waiting, "avg_queue": alpha * current_avg_queue + (1 - alpha) * prev_avg_queue, "travel_reward": alpha * travel_reward + (1 - alpha) * agent.prev_metrics[j_id]["travel_reward"] } # 9. 调试日志(暴露相位细节) if hasattr(agent, 'train_step') and agent.train_step % 100 == 0: for j_id in junction_ids: components = rewards[j_id]['components'] junction = fp.junction_dict[j_id] signal_id = junction["signal"] phase_info = fp.current_phases.get(signal_id, {}) agent.logger.info( f"Step {agent.train_step} | Junction {j_id} (信号{signal_id}) - " f"相位{phase_info.get('phase_id', -1)}(剩余{phase_info.get('remaining_duration', 0):.1f}s)| " f"延误奖:{components['delay']:.2f} 等待奖:{components['waiting']:.2f} " f"排队奖:{components['queue']:.2f} 相位奖:{components['phase']:.2f} " f"场景:{['平峰','高峰'][is_peak]} 天气:{weather_name} " f"总奖:{rewards[j_id]['total']:.2f}" ) return tuple(rewards[j_id]['total'] for j_id in junction_ids) # ------------------------------ # 辅助函数(修复语法错误,增强逻辑) # ------------------------------ def calculate_travel_reward(junction_id, fp, vehicles, invalid_lanes): """通行奖励:车辆进入+行驶距离,车型加权""" total_distance = 0.0 valid_count = 0 vehicle_dict = {v["v_id"]: v for v in vehicles} vehicle_type_weight = {1: 1.0, 2: 1.5, 3: 1.2, 4: 0.8, 5: 0.5} current_v_ids = {v["v_id"] for v in vehicles} # 车辆进入进口道即奖(加速初期正向反馈) base_entry_reward = 5.0 for vehicle in vehicles: v_id = vehicle["v_id"] if (vehicle["target_junction"] == junction_id and on_enter_lane(vehicle, invalid_lanes) and v_id not in fp.junction_metrics[junction_id]["completed_vehicles"]): total_distance += base_entry_reward valid_count += 1 # 行驶距离奖励(车型加权) for v_id, distance in fp.vehicle_distance_store.items(): if v_id not in current_v_ids or v_id not in vehicle_dict: continue vehicle = vehicle_dict[v_id] lane_id = vehicle["lane"] if lane_id in invalid_lanes or fp.vehicle_status.get(v_id, 0) != 0: continue if (vehicle["target_junction"] == junction_id and (on_enter_lane(vehicle, invalid_lanes) or in_junction(vehicle)) and v_id not in fp.junction_metrics[junction_id]["completed_vehicles"]): v_type = fp.vehicle_configs[vehicle["v_config_id"]]["v_type"] total_distance += distance * vehicle_type_weight.get(v_type, 1.0) valid_count += 1 # 增强短距离敏感性 return np.tanh(total_distance / (valid_count + 1e-5) / 30.0) if valid_count else 0.1直接帮我全部修改
08-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值