LA 3720 highway(利用动态规划思想,将复杂度急降,9ms)

题目链接

此题的解决方法可参考:http://blog.csdn.net/incredible_bly/article/details/11821403

这种可以接受,但是还可以更快吗?

计算过程有个 tmp = (m-x+1)*(n-y+1) 这个咋看只能适用于n*m的情况无法扩展,

将其展开: tmp = N*M - N*x - M *y + x*y   [N = n+1 , M = m+1]

此公式非常关键,N和M是变量,计算时只需记录系数即可 , 很容易将其扩展到(n+1)*m 或 n*(m+1) , 转移需要的复杂度是O(min{n,m}), 然后时间从500ms 到 10 ms 的转变。。

参考代码:

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 310;
int gcd(int a,int b){
    int r;
    while(b) r=a%b , a=b ,b=r;
    return a;
}
int vis[maxn][maxn] , cnt[maxn][maxn] , X[maxn][maxn] ,Y[maxn][maxn] ,dcnt[maxn][maxn];
int g[maxn][maxn];

void DP(int n,int m){
    if(vis[n][m]) return;
    int &c1=cnt[n][m] , &x1=X[n][m] , &y1 = Y[n][m] ,&d1 = dcnt[n][m];
    if(n==0 || m==0){
        c1 = d1 = x1 = y1 =0;
        vis[n][m] = 1;
        return;
    }
    if(n<m){
        DP(n,m-1);
        c1 = cnt[n][m-1] ,d1=dcnt[n][m-1] , x1=X[n][m-1] , y1=Y[n][m-1];
        for(int y=1;y<=n;y++){
            if(g[m][y]==1){
                c1++,d1+=m*y,x1+=m,y1+=y;
            }
            else if(g[m][y]==2){
                c1--,d1-=m*y,x1-=m,y1-=y;
            }
        }
    }
    else{
        DP(n-1,m);
        c1 = cnt[n-1][m] ,d1=dcnt[n-1][m] , x1=X[n-1][m] , y1=Y[n-1][m];
        for(int x=1;x<=m;x++){
            if(g[x][n]==1){
                c1++,d1+=x*n,x1+=x,y1+=n;
            }
            else if(g[x][n]==2){
                c1--,d1-=x*n,x1-=x,y1-=n;
            }
        }
    }
    vis[n][m] = 1;
    return ;
}
int main()
{
    for(int i=0;i<=300;i++) for(int j=0;j<=300;j++) g[i][j] = gcd(i,j);
    int n,m;
    while(scanf("%d%d",&n,&m)!=EOF && n+m){
        n-- , m--;
        if(n>m) swap(n,m);
        DP(n,m);
        int ans = cnt[n][m]*(n+1)*(m+1) - (n+1)*X[n][m] - (m+1)*Y[n][m] + dcnt[n][m];
        ans*=2;
        printf("%d\n",ans);
    }
    return 0;
}


首先,DDPG(Deep Deterministic Policy Gradient)是一种基于Actor-Critic的深度强化学习算法,可用于解决连续动作空间的问题。而路径规划是一种典型的强化学习问题,因此可以使用DDPG算法来解决路径规划问题。 在Python中,可以使用TensorFlow或PyTorch等深度学习框架来实现DDPG算法。同时,可以使用highway-env这个Python库作为强化学习环境,用于测试DDPG算法的效果。 下面是一个基于TensorFlow实现的DDPG算法的示例代码,用于解决路径规划问题: ```python import tensorflow as tf import numpy as np import gym import highway_env from ddpg import DDPG # 创建环境 env = gym.make('highway-v0') # 设置DDPG算法的超参数 actor_lr = 0.0001 critic_lr = 0.001 gamma = 0.99 tau = 0.001 buffer_size = 1000000 batch_size = 64 action_dim = env.action_space.shape[0] state_dim = env.observation_space.shape[0] # 创建DDPG对象 ddpg = DDPG(actor_lr, critic_lr, gamma, tau, buffer_size, batch_size, action_dim, state_dim) # 训练DDPG算法 for i in range(5000): obs = env.reset() done = False while not done: action = ddpg.choose_action(obs) next_obs, reward, done, info = env.step(action) ddpg.store_transition(obs, action, reward, next_obs, done) if len(ddpg.memory) > batch_size: ddpg.learn() obs = next_obs # 测试DDPG算法的效果 obs = env.reset() done = False while not done: action = ddpg.choose_action(obs) next_obs, reward, done, info = env.step(action) obs = next_obs env.render() ``` 在上面的代码中,DDPG类的实现可以参考如下代码: ```python class DDPG: def __init__(self, actor_lr, critic_lr, gamma, tau, buffer_size, batch_size, action_dim, state_dim): self.actor_lr = actor_lr self.critic_lr = critic_lr self.gamma = gamma self.tau = tau self.batch_size = batch_size self.action_dim = action_dim self.state_dim = state_dim self.memory = [] self.buffer_size = buffer_size self.actor = self.build_actor() self.critic = self.build_critic() self.target_actor = self.build_actor() self.target_critic = self.build_critic() self.update_target_op = self.update_target_network() # 创建Actor网络 def build_actor(self): inputs = tf.keras.layers.Input(shape=(self.state_dim,)) x = tf.keras.layers.Dense(256, activation='relu')(inputs) x = tf.keras.layers.Dense(128, activation='relu')(x) outputs = tf.keras.layers.Dense(self.action_dim, activation='tanh')(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.actor_lr), loss='mse') return model # 创建Critic网络 def build_critic(self): state_inputs = tf.keras.layers.Input(shape=(self.state_dim,)) state_x = tf.keras.layers.Dense(256, activation='relu')(state_inputs) state_x = tf.keras.layers.Dense(128, activation='relu')(state_x) action_inputs = tf.keras.layers.Input(shape=(self.action_dim,)) action_x = tf.keras.layers.Dense(128, activation='relu')(action_inputs) x = tf.keras.layers.Concatenate()([state_x, action_x]) x = tf.keras.layers.Dense(128, activation='relu')(x) outputs = tf.keras.layers.Dense(1)(x) model = tf.keras.Model(inputs=[state_inputs, action_inputs], outputs=outputs) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.critic_lr), loss='mse') return model # 更新目标网络 def update_target_network(self): weights = [] targets = self.target_actor.weights for i, weight in enumerate(self.actor.weights): weights.append(weight * self.tau + targets[i] * (1 - self.tau)) self.target_actor.set_weights(weights) weights = [] targets = self.target_critic.weights for i, weight in enumerate(self.critic.weights): weights.append(weight * self.tau + targets[i] * (1 - self.tau)) self.target_critic.set_weights(weights) # 存储经验 def store_transition(self, state, action, reward, next_state, done): self.memory.append([state, action, reward, next_state, done]) if len(self.memory) > self.buffer_size: self.memory.pop(0) # 选择动作 def choose_action(self, state): state = np.array([state]) action = self.actor.predict(state)[0] return action # 学习 def learn(self): minibatch = np.random.choice(self.memory, self.batch_size, replace=False) states = np.array([transition[0] for transition in minibatch]) actions = np.array([transition[1] for transition in minibatch]) rewards = np.array([transition[2] for transition in minibatch]) next_states = np.array([transition[3] for transition in minibatch]) dones = np.array([transition[4] for transition in minibatch]) # 更新Critic网络 with tf.GradientTape() as tape: next_actions = self.target_actor.predict(next_states) target_next_q = self.target_critic.predict([next_states, next_actions]) target_q = rewards + self.gamma * target_next_q * (1 - dones) q = self.critic.predict([states, actions]) critic_loss = tf.reduce_mean(tf.square(target_q - q)) critic_grads = tape.gradient(critic_loss, self.critic.trainable_variables) self.critic.optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables)) # 更新Actor网络 with tf.GradientTape() as tape: actor_actions = self.actor.predict(states) actor_loss = -tf.reduce_mean(self.critic([states, actor_actions])) actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables) self.actor.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables)) # 更新目标网络 self.update_target_network() ``` 最后,运行上述代码,可以得到DDPG算法在highway-env环境下的路径规划效果。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值