chppenv

来旺
于 2023-12-04 09:29:19 发布
阅读量179
点赞数
文章标签：强化学习
本文链接：https://blog.csdn.net/Young_Naive/article/details/134775829
版权
import numpy as np
import math
import random


class TransportMatchingEnv:

    def __init__(self, num_drivers=3, num_goods=3, max_price=10, max_time=5):
        """
        TODO:       1.修复逻辑
        TODO:       2.加入时间
        :param num_drivers: 货车数量
        :param num_goods: 货物数量
        :param max_price: 最大价格
        :param max_time: 最大时间
        """

        self.num_drivers = num_drivers
        self.num_goods = num_goods
        self.max_price = max_price
        self.max_time = max_time
        # 时间
        self.goods_time = [(1, 6), (7, 9), (12, 23)]
        # 货物类型
        self.good_type = [1, 2, 3]
        self.car_type = [7, 8, 9]
        self.suitability_matrix = [[1, 2, 1.8],
                                   [3, 1, 0.8],
                                   [4, 2, 1]]
        # 货车是否被占用
        self.occupation = [[], [], []]
        # 动作空间
        self.action_dim = self.num_drivers * self.num_goods
        # 当前协商状态 TODO: 状态，需要加很多东西
        self.current_negotiation = None
        # 状态
        self.combined_state = self.reset()
        # 距离矩阵，表示货与车之间的距离
        self.distance_matrix = np.random.randint(0, 100, (self.num_goods, self.num_drivers))
        # 货主期望抵达时间
        self.goods_time_preferences = np.random.randint(0, self.max_time, self.num_goods)
        # 货主期望价格
        self.goods_expected_prices = np.random.randint(0, self.max_price, self.num_goods)
        # 车主期望价格
        self.vehicle_expected_prices = np.random.randint(0, self.max_price, self.num_goods)
        # 车主是否空闲
        self.driver_availabilities = np.zeros(num_drivers)
        # 货物是否已经被取走
        self.goods_availabilities = np.zeros(num_goods)
        # 货物是否有特殊需求
        self.goods_special_requirements = np.random.choice([0, 1], self.num_goods)
        # 货物类型矩阵.目前定位十种货物类型
        self.goods_type = np.random.randint(0, 10, self.num_goods)
        # 车辆类型矩阵,目前假设有10种类型的车辆
        self.vehicle_type = np.random.randint(0, 10, self.num_drivers)
        # 车辆运输速度
        self.vehicle_speed = np.random.randint(30, 80, self.num_drivers)
        # 货主所需的等待时间,单位：分钟
        self.wait_time = np.random.randint(0, 120, self.num_drivers)
        # 货车司机服务质量评分，满分100
        self.driver_service_quality = np.random.randint(0, 100, self.num_drivers)
        # 车主的最大等待时间，目前都为随机数据
        self.max_wait_time = np.random.randint(30, 120, self.num_drivers)
        # 货车可以行驶的最大距离
        self.max_distance = np.random.randint(50, 101, size=self.num_drivers)
        # 货物匹配度矩阵
        self.match_matrix = np.random.choice([0.3, 0.5, 1], size=(10, 20))
        # 兰州的经纬度范围, 假设经度在东经102.4-104.0，维度在北纬35.5-37.0,用于计算距离
        self.longitude_range = [102.4, 104.0]
        self.latitude_range = [35.5, 37.0]

        # 货物经纬度
        self.goods_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1], self.num_goods)
        self.goods_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_goods)
        self.goods_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))

        # 车辆经纬度
        self.drivers_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1], self.num_drivers)
        self.drivers_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_drivers)
        self.drivers_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))

        # 货物目的地经纬度
        self.destination_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1],
                                                        self.num_goods)
        self.destination_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_goods)
        self.destination_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))

    def haversine_distance(self, lat1, lon1, lat2, lon2):
        """
        Haversine公式
        根据经纬度计算地球上两点之间的距离
        :param lat1:
        :param lon1:
        :param lat2:
        :param lon2:
        :return:
        """
        # 将十进制度数转化为弧度
        lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

        # haversine公式
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        r = 6371  # 地球平均半径，单位为公里
        return c * r

    def decode_action(self, encoded_action):
        """
        将action解码为人类可以读懂的形式,action是一个
        :param encoded_action:
        :return:
        """
        driver_index = encoded_action % 3
        good_index = int(encoded_action / 3)
        # 价格需要根据车主和货主进行计算

        return driver_index, good_index

    def reset(self):
        """
        重置环境
        :return:
        """
        random.seed(0)
        self.occupation = [0, 0, 0]
        self.current_negotiation = np.zeros((self.num_goods, self.num_drivers))

        # Refresh all the parameters every time you reset the environment
        self.distance_matrix = np.random.randint(0, 100, (self.num_goods, self.num_drivers))
        self.goods_time_preferences = np.random.randint(0, self.max_time, self.num_goods)
        self.goods_expected_prices = np.random.randint(0, self.max_price, self.num_goods)

        self.goods_special_requirements = np.random.choice([0, 1], self.num_goods)
        self.driver_special_capabilities = np.random.choice([0, 1], self.num_drivers)
        self.driver_availabilities = np.zeros(10)
        # 货物是否已经被取走
        self.goods_availabilities = np.zeros(10)
        combined_state = np.concatenate((
            self.current_negotiation.flatten(),
            self.distance_matrix.flatten(),
            self.goods_time_preferences,
            self.goods_expected_prices,
            self.driver_availabilities,
            self.goods_special_requirements,
            self.driver_special_capabilities
        ))
        # print(f'combined_state.shape:{combined_state.shape}')
        return combined_state

    def driver_satisfaction(self, fee_received, expected_fee, distance_travelled, max_distance, wait_time,
                            max_wait_time):
        price_satisfaction = max(0, 30 - (30 * abs(fee_received - expected_fee) / expected_fee))
        distance_satisfaction = max(0, 30 - (30 * distance_travelled / max_distance))
        wait_satisfaction = max(0, 40 - (40 * wait_time / max_wait_time))

        total_satisfaction = price_satisfaction + distance_satisfaction + wait_satisfaction
        return min(total_satisfaction, 100)

    def shipper_satisfaction(self, fee_paid, expected_fee, delivery_time, expected_delivery_time):
        """
        计算货主的满意度
        :param fee_paid: 已付费用
        :param expected_fee: 预期费用
        :param delivery_time: 实际运输时间
        :param expected_delivery_time: 预期运输时间
        :return: 总满意度分数，满分为100
        """
        # 计算价格满意度，假设价格的最大权重为40分
        price_satisfaction = max(0, 40 - (40 * abs(fee_paid - expected_fee) / expected_fee))

        # 计算时间满意度，假设时间的最大权重为60分
        time_satisfaction = max(0, 60 - (60 * abs(delivery_time - expected_delivery_time) / expected_delivery_time))

        # 总满意度分数
        total_satisfaction = price_satisfaction + time_satisfaction

        # 确保总分不超过100
        return min(total_satisfaction, 100)

    def suitability(self, driver_index: int, good_index: int):
        """
        匹配度计算
        :param driver_index:
        :param good_index:
        :return:
        """
        type_driver = self.car_type[driver_index]
        type_good = self.good_type[good_index]
        return self.suitability_matrix[driver_index][good_index] * 10

    def isContinue(self, driver_index, good_index):
        """
        判断是否进行协商
        :return:
        """
        # for i in self.occupation[driver_index]:
        #     if self.goods_time[good_index][0]
        if self.occupation[driver_index] or self.driver_availabilities[driver_index] == 1 or self.goods_availabilities[
            good_index] == 1:
            return False
        return True

    def compute_price(self, driver_index, good_index):
        """

        :return:
        """
        # 得到driver_index.和good_index所对应的货物信息 TODO
        price = self.goods_expected_prices[good_index]
        return price

    def step(self, encoded_action):
        """
        // TODO：加入时间概念
        :param encoded_action: action包含 driver_index, good_index, price, time
        :param price: 打算付的费用（系统自动生成的
        :param time: 打算所用的时间（系统自动生成的
        :return:
        """
        driver_index, good_index = self.decode_action(encoded_action)
        # 根据driver_index, good_index 计算出一个合理的价格
        # print(f'driver_index, good_index:{driver_index,good_index}')
        price = self.compute_price(driver_index, good_index)

        # 如果已经占用，reward返回0 TODO
        reward = 0
        done = np.sum(self.current_negotiation) == self.num_goods

        if done or not self.isContinue(driver_index, good_index):
            # 如果已经协商完成或者不符合现实逻辑，直接跳过
            return self.combined_state, reward, done, {}

        # 货主满意度
        fee_paid = price
        # 车主预期费用
        expected_fee = self.vehicle_expected_prices[driver_index]
        # 车辆运输速度
        delivery_time = self.vehicle_speed[driver_index]
        expected_delivery_time = self.vehicle_speed[driver_index]
        # 司机服务质量
        driver_service_quality = self.driver_service_quality[driver_index]

        # 司机满意度
        # 收到的费用
        fee_received = price
        # 车主预期费用
        expected_fee = self.vehicle_expected_prices[driver_index]
        # 行驶距离
        distance_travelled = (self.haversine_distance(self.goods_latitudes[good_index],
                                                      self.goods_longitudes[good_index],
                                                      self.drivers_latitudes[driver_index],
                                                      self.drivers_longitudes[driver_index])
                              + self.haversine_distance(
                    self.goods_latitudes[good_index],
                    self.goods_longitudes[good_index],
                    self.destination_latitudes[good_index],
                    self.destination_longitudes[good_index]))
        # 运输时间 距离/速度
        transfer_time = float(distance_travelled) / float(delivery_time)
        # 最大距离-
        max_distance = self.max_distance[driver_index]
        # wait_time TODO:待定
        wait_time = self.wait_time[good_index]

        # max_wait_time: 最大等待时间， TODO:待定
        max_wait_time = self.max_wait_time[driver_index]
        # 货主的满意度
        reward1 = self.shipper_satisfaction(fee_paid, expected_fee, delivery_time,
                                            expected_delivery_time
                                            )
        # 车主的满意度
        reward2 = self.driver_satisfaction(fee_received, expected_fee, distance_travelled, max_distance, wait_time,
                                           max_wait_time)
        reward3 = self.suitability(driver_index, good_index)
        # print(f'reward1,reward2:{reward1, reward2}')
        if reward1 + reward2 + reward3 > 0:
            # print(f'driver_index,good_index:{driver_index, good_index}')
            # print(f'reward1,reward2:{reward1, reward2}')
            is_success = True
            self.driver_availabilities[driver_index] = 1
            self.goods_availabilities[good_index] = 1
            self.current_negotiation[good_index][driver_index] = 1
        else:
            # 不成功
            is_success = False

        reward = reward1 + reward2 + reward3
        combined_state = np.concatenate((
            self.current_negotiation.flatten(),
            self.distance_matrix.flatten(),
            self.goods_time_preferences,
            self.goods_expected_prices,
            self.driver_availabilities,
            self.goods_special_requirements,
            self.driver_special_capabilities
        ))
        done = np.sum(self.current_negotiation) == self.num_goods
        if is_success:
            return combined_state, reward, done, self.current_negotiation
        else:
            return combined_state, reward, done, self.current_negotiation

    def render(self):
        print(self.current_negotiation)