import numpy as np
import math
import random
class TransportMatchingEnv:
def __init__(self, num_drivers=3, num_goods=3, max_price=10, max_time=5):
"""
TODO: 1.修复逻辑
TODO: 2.加入时间
:param num_drivers: 货车数量
:param num_goods: 货物数量
:param max_price: 最大价格
:param max_time: 最大时间
"""
self.num_drivers = num_drivers
self.num_goods = num_goods
self.max_price = max_price
self.max_time = max_time
# 时间
self.goods_time = [(1, 6), (7, 9), (12, 23)]
# 货物类型
self.good_type = [1, 2, 3]
self.car_type = [7, 8, 9]
self.suitability_matrix = [[1, 2, 1.8],
[3, 1, 0.8],
[4, 2, 1]]
# 货车是否被占用
self.occupation = [[], [], []]
# 动作空间
self.action_dim = self.num_drivers * self.num_goods
# 当前协商状态 TODO: 状态,需要加很多东西
self.current_negotiation = None
# 状态
self.combined_state = self.reset()
# 距离矩阵,表示货与车之间的距离
self.distance_matrix = np.random.randint(0, 100, (self.num_goods, self.num_drivers))
# 货主期望抵达时间
self.goods_time_preferences = np.random.randint(0, self.max_time, self.num_goods)
# 货主期望价格
self.goods_expected_prices = np.random.randint(0, self.max_price, self.num_goods)
# 车主期望价格
self.vehicle_expected_prices = np.random.randint(0, self.max_price, self.num_goods)
# 车主是否空闲
self.driver_availabilities = np.zeros(num_drivers)
# 货物是否已经被取走
self.goods_availabilities = np.zeros(num_goods)
# 货物是否有特殊需求
self.goods_special_requirements = np.random.choice([0, 1], self.num_goods)
# 货物类型矩阵.目前定位十种货物类型
self.goods_type = np.random.randint(0, 10, self.num_goods)
# 车辆类型矩阵,目前假设有10种类型的车辆
self.vehicle_type = np.random.randint(0, 10, self.num_drivers)
# 车辆运输速度
self.vehicle_speed = np.random.randint(30, 80, self.num_drivers)
# 货主所需的等待时间,单位:分钟
self.wait_time = np.random.randint(0, 120, self.num_drivers)
# 货车司机服务质量评分,满分100
self.driver_service_quality = np.random.randint(0, 100, self.num_drivers)
# 车主的最大等待时间,目前都为随机数据
self.max_wait_time = np.random.randint(30, 120, self.num_drivers)
# 货车可以行驶的最大距离
self.max_distance = np.random.randint(50, 101, size=self.num_drivers)
# 货物匹配度矩阵
self.match_matrix = np.random.choice([0.3, 0.5, 1], size=(10, 20))
# 兰州的经纬度范围, 假设经度在东经102.4-104.0,维度在北纬35.5-37.0,用于计算距离
self.longitude_range = [102.4, 104.0]
self.latitude_range = [35.5, 37.0]
# 货物经纬度
self.goods_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1], self.num_goods)
self.goods_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_goods)
self.goods_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))
# 车辆经纬度
self.drivers_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1], self.num_drivers)
self.drivers_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_drivers)
self.drivers_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))
# 货物目的地经纬度
self.destination_longitudes = np.random.uniform(self.longitude_range[0], self.longitude_range[1],
self.num_goods)
self.destination_latitudes = np.random.uniform(self.latitude_range[0], self.latitude_range[1], self.num_goods)
self.destination_coordinates = np.column_stack((self.goods_longitudes, self.goods_latitudes))
def haversine_distance(self, lat1, lon1, lat2, lon2):
"""
Haversine公式
根据经纬度计算地球上两点之间的距离
:param lat1:
:param lon1:
:param lat2:
:param lon2:
:return:
"""
# 将十进制度数转化为弧度
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
# haversine公式
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
r = 6371 # 地球平均半径,单位为公里
return c * r
def decode_action(self, encoded_action):
"""
将action解码为人类可以读懂的形式,action是一个
:param encoded_action:
:return:
"""
driver_index = encoded_action % 3
good_index = int(encoded_action / 3)
# 价格需要根据车主和货主进行计算
return driver_index, good_index
def reset(self):
"""
重置环境
:return:
"""
random.seed(0)
self.occupation = [0, 0, 0]
self.current_negotiation = np.zeros((self.num_goods, self.num_drivers))
# Refresh all the parameters every time you reset the environment
self.distance_matrix = np.random.randint(0, 100, (self.num_goods, self.num_drivers))
self.goods_time_preferences = np.random.randint(0, self.max_time, self.num_goods)
self.goods_expected_prices = np.random.randint(0, self.max_price, self.num_goods)
self.goods_special_requirements = np.random.choice([0, 1], self.num_goods)
self.driver_special_capabilities = np.random.choice([0, 1], self.num_drivers)
self.driver_availabilities = np.zeros(10)
# 货物是否已经被取走
self.goods_availabilities = np.zeros(10)
combined_state = np.concatenate((
self.current_negotiation.flatten(),
self.distance_matrix.flatten(),
self.goods_time_preferences,
self.goods_expected_prices,
self.driver_availabilities,
self.goods_special_requirements,
self.driver_special_capabilities
))
# print(f'combined_state.shape:{combined_state.shape}')
return combined_state
def driver_satisfaction(self, fee_received, expected_fee, distance_travelled, max_distance, wait_time,
max_wait_time):
price_satisfaction = max(0, 30 - (30 * abs(fee_received - expected_fee) / expected_fee))
distance_satisfaction = max(0, 30 - (30 * distance_travelled / max_distance))
wait_satisfaction = max(0, 40 - (40 * wait_time / max_wait_time))
total_satisfaction = price_satisfaction + distance_satisfaction + wait_satisfaction
return min(total_satisfaction, 100)
def shipper_satisfaction(self, fee_paid, expected_fee, delivery_time, expected_delivery_time):
"""
计算货主的满意度
:param fee_paid: 已付费用
:param expected_fee: 预期费用
:param delivery_time: 实际运输时间
:param expected_delivery_time: 预期运输时间
:return: 总满意度分数,满分为100
"""
# 计算价格满意度,假设价格的最大权重为40分
price_satisfaction = max(0, 40 - (40 * abs(fee_paid - expected_fee) / expected_fee))
# 计算时间满意度,假设时间的最大权重为60分
time_satisfaction = max(0, 60 - (60 * abs(delivery_time - expected_delivery_time) / expected_delivery_time))
# 总满意度分数
total_satisfaction = price_satisfaction + time_satisfaction
# 确保总分不超过100
return min(total_satisfaction, 100)
def suitability(self, driver_index: int, good_index: int):
"""
匹配度计算
:param driver_index:
:param good_index:
:return:
"""
type_driver = self.car_type[driver_index]
type_good = self.good_type[good_index]
return self.suitability_matrix[driver_index][good_index] * 10
def isContinue(self, driver_index, good_index):
"""
判断是否进行协商
:return:
"""
# for i in self.occupation[driver_index]:
# if self.goods_time[good_index][0]
if self.occupation[driver_index] or self.driver_availabilities[driver_index] == 1 or self.goods_availabilities[
good_index] == 1:
return False
return True
def compute_price(self, driver_index, good_index):
"""
:return:
"""
# 得到driver_index.和good_index所对应的货物信息 TODO
price = self.goods_expected_prices[good_index]
return price
def step(self, encoded_action):
"""
// TODO:加入时间概念
:param encoded_action: action包含 driver_index, good_index, price, time
:param price: 打算付的费用(系统自动生成的
:param time: 打算所用的时间(系统自动生成的
:return:
"""
driver_index, good_index = self.decode_action(encoded_action)
# 根据driver_index, good_index 计算出一个合理的价格
# print(f'driver_index, good_index:{driver_index,good_index}')
price = self.compute_price(driver_index, good_index)
# 如果已经占用,reward返回0 TODO
reward = 0
done = np.sum(self.current_negotiation) == self.num_goods
if done or not self.isContinue(driver_index, good_index):
# 如果已经协商完成或者不符合现实逻辑,直接跳过
return self.combined_state, reward, done, {}
# 货主满意度
fee_paid = price
# 车主预期费用
expected_fee = self.vehicle_expected_prices[driver_index]
# 车辆运输速度
delivery_time = self.vehicle_speed[driver_index]
expected_delivery_time = self.vehicle_speed[driver_index]
# 司机服务质量
driver_service_quality = self.driver_service_quality[driver_index]
# 司机满意度
# 收到的费用
fee_received = price
# 车主预期费用
expected_fee = self.vehicle_expected_prices[driver_index]
# 行驶距离
distance_travelled = (self.haversine_distance(self.goods_latitudes[good_index],
self.goods_longitudes[good_index],
self.drivers_latitudes[driver_index],
self.drivers_longitudes[driver_index])
+ self.haversine_distance(
self.goods_latitudes[good_index],
self.goods_longitudes[good_index],
self.destination_latitudes[good_index],
self.destination_longitudes[good_index]))
# 运输时间 距离/速度
transfer_time = float(distance_travelled) / float(delivery_time)
# 最大距离-
max_distance = self.max_distance[driver_index]
# wait_time TODO:待定
wait_time = self.wait_time[good_index]
# max_wait_time: 最大等待时间, TODO:待定
max_wait_time = self.max_wait_time[driver_index]
# 货主的满意度
reward1 = self.shipper_satisfaction(fee_paid, expected_fee, delivery_time,
expected_delivery_time
)
# 车主的满意度
reward2 = self.driver_satisfaction(fee_received, expected_fee, distance_travelled, max_distance, wait_time,
max_wait_time)
reward3 = self.suitability(driver_index, good_index)
# print(f'reward1,reward2:{reward1, reward2}')
if reward1 + reward2 + reward3 > 0:
# print(f'driver_index,good_index:{driver_index, good_index}')
# print(f'reward1,reward2:{reward1, reward2}')
is_success = True
self.driver_availabilities[driver_index] = 1
self.goods_availabilities[good_index] = 1
self.current_negotiation[good_index][driver_index] = 1
else:
# 不成功
is_success = False
reward = reward1 + reward2 + reward3
combined_state = np.concatenate((
self.current_negotiation.flatten(),
self.distance_matrix.flatten(),
self.goods_time_preferences,
self.goods_expected_prices,
self.driver_availabilities,
self.goods_special_requirements,
self.driver_special_capabilities
))
done = np.sum(self.current_negotiation) == self.num_goods
if is_success:
return combined_state, reward, done, self.current_negotiation
else:
return combined_state, reward, done, self.current_negotiation
def render(self):
print(self.current_negotiation)
11-09
11-09