强化学习
对银行借贷进行建模,企业确定每个时期的累计资金需求量,然后在与银行协商的过程中,银行给出贷款利率,企业确定实际贷款金额。主要针对于银行借贷业务频繁、经营状况良好且拥有一定资产的大型企业。对于这类企业,在银行资金充裕情况下,基本上都可以借到款项。企业每个时期都有一定的资金需求,而贷款时间越长,贷款利率越高,故企业会充分考虑资金的时间价值,尽量缩短贷款时间,避免不必要的资金成本。假设企业通过合理的时间划分,实现企业在每个时段初贷款,在该时段末还款。使用强化学习进行求解,相关部分代码如下:
构建环境
class LoanEnv(gym.Env):
def __init__(self,company_num=20,max_episode_steps=2):
self.max_episode_steps=max_episode_steps
self.current_steps=None
self.current_cost_period=None
self.cost_nonconstant_variable= None
self.cost_constant_variable=0.002
self.loss_balance_factor=0.6
self.company_num=company_num
self.random_need_mean=10
self.random_need_stderror=5
self.rate_sensitivity=0.004
self.overstock_rate=0.5
self.baseline_rate=0.0435
self.commitment_rate=0.005
self.floating_cap=0.3
self.floating_floor=0.2
self.rate_floor=(1 - self.floating_floor) * self.baseline_rate
self.rate_cap=(1 + self.floating_cap) * self.baseline_rate
self.needs_low=0
self.needs_high=10000
self.period_low=0
self.period_high=20
self.cost_low=0
self.cost_high=10000
self.low=np.array([self.needs_low,self.period_low,self.cost_low])
self.high=np.array([self.needs_high,self.period_high,self.cost_high])
self.action_space = spaces.Box(low=self.rate_floor, high=self.rate_cap, shape=(self.company_num,), dtype=np.float64)
self.observation_space = spaces.Box(low=self.low,high=self.high,dtype=np.float64)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def cost_period(self):
new_cost_period= np.floor((self.current_steps - 1) / 12 + 1 )
if new_cost_period != self.current_cost_period:
self.cost_nonconstant_variable=np.random.choice([0.000005,0.00001,0.00002])
print("成本非固定参数:",self.cost_nonconstant_variable)
self.current_cost_period=new_cost_period
print('当前成本阶段:',self.current_cost_period)
def get_cost_period(self):
return self.current_cost_period
def need_function(self,actual_needs):
expected_needs=self.state[0]
random_needs=np.random.normal(loc=self.random_need_mean,scale=self.random_need_stderror,size=self.company_num)
print("随机需求:",random_needs)
need_extend_rates=[np.random.choice([0,0.1,0.2,0.3,0.4,0.5]) for i in range(self.company_num)]
print("需求扩展率:",need_extend_rates)
expected_needs=self.overstock_rate * (expected_needs-actual_needs) + need_extend_rates * actual_needs + random_needs
expected_needs=np.clip(expected_needs,self.needs_low,self.needs_high)
print("下一阶段的需求:",expected_needs)
return np.array(expected_needs)
def get_state(self):
return np.array(self.state)
def company_loss(self,actual_needs,actions):
expected_needs=self.state[0]
company_losses=[self.rate_sensitivity * (expected_needs[i] - actual_needs[i]) ** 2 + actions[i] * actual_needs[i] + self.commitment_rate * expected_needs[i] for i in range(self.company_num)]
print("企业损失:",company_losses)
total_company_losses=np