obs的一段开源代码,求讲讲这段代码的作用

这段代码是BLAKE2哈希函数的内联实现,包含了用于加载和存储不同数据类型的函数,如uint32_t、uint64_t,以及处理不同字节顺序(little_endian)的辅助函数,如load32、store32等。
摘要由CSDN通过智能技术生成

#ifndef BLAKE2_IMPL_H
#define BLAKE2_IMPL_H

#include <stdint.h>
#include <string.h>

#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
  #if   defined(_MSC_VER)
    #define BLAKE2_INLINE __inline
  #elif defined(__GNUC__)
    #define BLAKE2_INLINE __inline__
  #else
    #define BLAKE2_INLINE
  #endif
#else
  #define BLAKE2_INLINE inline
#endif

static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  uint32_t w;
  memcpy(&w, src, sizeof w);
  return w;
#else
  const uint8_t *p = ( const uint8_t * )src;
  return (( uint32_t )( p[0] ) <<  0) |
         (( uint32_t )( p[1] ) <<  8) |
         (( uint32_t )( p[2] ) << 16) |
         (( uint32_t )( p[3] ) << 24) ;
#endif
}

static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  uint64_t w;
  memcpy(&w, src, sizeof w);
  return w;
#else
  const uint8_t *p = ( const uint8_t * )src;
  return (( uint64_t )( p[0] ) <<  0) |
         (( uint64_t )( p[1] ) <<  8) |
         (( uint64_t )( p[2] ) << 16) |
         (( uint64_t )( p[3] ) << 24) |
         (( uint64_t )( p[4] ) << 32) |
         (( uint64_t )( p[5] ) << 40) |
         (( uint64_t )( p[6] ) << 48) |
         (( uint64_t )( p[7] ) << 56) ;
#endif
}

static BLAKE2_INLINE uint16_t load16( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  uint16_t w;
  memcpy(&w, src, sizeof w);
  return w;
#else
  const uint8_t *p = ( const uint8_t * )src;
  return (( uint16_t )( p[0] ) <<  0) |
         (( uint16_t )( p[1] ) <<  8) ;
#endif
}

static BLAKE2_INLINE void store16( void *dst, uint16_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  memcpy(dst, &w, sizeof w);
#else
  uint8_t *p = ( uint8_t * )dst;
  *p++ = ( uint8_t )w; w >>= 8;
  *p++ = ( uint8_t )w;
#endif
}

static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  memcpy(dst, &w, sizeof w);
#else
  uint8_t *p = ( uint8_t * )dst;
  p[0] = (uint8_t)(w >>  0);
  p[1] = (uint8_t)(w >>  8);
  p[2] = (uint8_t)(w >> 16);
  p[3] = (uint8_t)(w >> 24);
#endif
}

static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
  memcpy(dst, &w, sizeof w);
#else
  uint8_t *p = ( uint8_t * )dst;
  p[0] = (uint8_t)(w >>  0);
  p[1] = (uint8_t)(w >>  8);
  p[2] = (uint8_t)(w >> 16);
  p[3] = (uint8_t)(w >> 24);
  p[4] = (uint8_t)(w >> 32);
  p[5] = (uint8_t)(w >> 40);
  p[6] = (uint8_t)(w >> 48);
  p[7] = (uint8_t)(w >> 56);
#endif
}

static BLAKE2_INLINE uint64_t load48( const void *src )
{
  const uint8_t *p = ( const uint8_t * )src;
  return (( uint64_t )( p[0] ) <<  0) |
         (( uint64_t )( p[1] ) <<  8) |
         (( uint64_t )( p[2] ) << 16) |
         (( uint64_t )( p[3] ) << 24) |
         (( uint64_t )( p[4] ) << 32) |
         (( uint64_t )( p[5] ) << 40) ;
}

static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
  uint8_t *p = ( uint8_t * )dst;
  p[0] = (uint8_t)(w >>  0);
  p[1] = (uint8_t)(w >>  8);
  p[2] = (uint8_t)(w >> 16);
  p[3] = (uint8_t)(w >> 24);
  p[4] = (uint8_t)(w >> 32);
  p[5] = (uint8_t)(w >> 40);
}

static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
  return ( w >> c ) | ( w << ( 32 - c ) );
}

static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
  return ( w >> c ) | ( w << ( 64 - c ) );
}

/* prevents compiler optimizing out memset() */
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
  static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
  memset_v(v, 0, n);
}

#endif

DDPG算法是一种基于Actor-Critic架构的深度强化学习算法,下面是一个简单的DDPG算法的Matlab代码实现: ```matlab % DDPG算法的matlab实现 % 算法流程:Actor-Critic架构+经验回放+延迟更新+目标网络 % 环境:Matlab R2020a % 清空环境变量 clear all close all clc % 参数设置 N = 1000; % 迭代次数 T = 100; % 每轮步数 gamma = 0.99; % 折扣因子 tau = 0.01; % 目标网络更新速率 buffer_size = 10000; % 经验回放缓存大小 batch_size = 32; % 每次训练的样本数 lr_actor = 1e-3; % Actor网络学习率 lr_critic = 1e-3; % Critic网络学习率 noise_std = 0.1; % 噪声标准差 % 环境设置 env = rlPredefinedEnv("Pendulum-Continuous"); % 状态和动作空间 state_dim = numel(env.ObservationInfo); action_dim = numel(env.ActionInfo); % Actor网络结构 actor_layers = [ imageInputLayer([1 1 state_dim],'Normalization','none','Name','state') fullyConnectedLayer(128,'Name','FC1') reluLayer('Name','Relu1') fullyConnectedLayer(64,'Name','FC2') reluLayer('Name','Relu2') fullyConnectedLayer(action_dim,'Name','output') tanhLayer('Name','tanh')]; actor_options = rlRepresentationOptions('LearnRate',lr_actor,'GradientThreshold',1); % Critic网络结构 critic_layers = [ imageInputLayer([1 1 state_dim],'Normalization','none','Name','state') fullyConnectedLayer(128,'Name','FC1') reluLayer('Name','Relu1') fullyConnectedLayer(64,'Name','FC2') reluLayer('Name','Relu2') fullyConnectedLayer(action_dim,'Name','FC3') additionLayer(2,'Name','add') reluLayer('Name','Relu3') fullyConnectedLayer(1,'Name','output')]; critic_options = rlRepresentationOptions('LearnRate',lr_critic,'GradientThreshold',1); % 创建Actor网络和Critic网络 actor = rlStochasticActorRepresentation(actor_layers,state_dim,action_dim,'Observation',{'state'},'Action',{'tanh'},actor_options); critic = rlValueRepresentation(critic_layers,state_dim,action_dim,'Observation',{'state'},'Action',{'tanh'},critic_options); % 创建DDPG智能体 agent = rlDDPGAgent(actor,critic); agent.Options.DiscountFactor = gamma; agent.Options.TargetSmoothFactor = tau; % 创建经验回放缓存 buffer = rlReplayBuffer(buffer_size); % 训练 for i = 1:N % 重置环境 obs = env.reset(); % 初始化ep_reward ep_reward = 0; % 始训练 for t = 1:T % 获取动作 action = agent.getAction(obs); % 添加噪声 noise = noise_std*randn(size(action)); action = action + noise; % 执行动作 [next_obs,reward,done] = env.step(action); % 计算当前ep_reward ep_reward = ep_reward + reward; % 存储经验 buffer.add(obs,action,reward,next_obs,done); % 更新网络 if buffer.NumExperience >= batch_size minibatch = buffer.getMiniBatch(batch_size); agent = train(agent,minibatch); end % 更新状态 obs = next_obs; % 判断是否结束 if done break end end % 输出结果 fprintf("Episode %d, Reward %d\n",i,ep_reward); end ``` 注意,上述代码中的状态空间和动作空间都是连续的,因此使用了tanh作为Actor网络的输出激活函数。如果状态空间和动作空间是离散的,需要修改网络结构和输出激活函数。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值