模型量化压缩(fp32->fp16)

1 fp32_2_fp16

#ifndef __COMPILIER_FP16_H__
#define __COMPILIER_FP16_H__

#ifdef MACOS

#else
#ifdef __cplusplus
extern "C" {
#endif

#if defined __ARM_ARCH || defined __riscv

#define fp16_to_fp32(data) \
    ({                     \
        float f = data;    \
        f;                 \
    })

#define fp32_to_fp16(data) \
    ({                     \
        __fp16 f = data;   \
        f;                 \
    })

#else
#ifdef _MSC_VER
#pragma pack(push, 1)
struct fp16_pack
{
    unsigned short frac : 10;
    unsigned char exp : 5;
    unsigned char sign : 1;
};

struct fp32_pack
{
    unsigned int frac : 23;
    unsigned char exp : 8;
    unsigned char sign : 1;
};
#pragma pack(pop)
#else
struct fp16_pack
{
    unsigned short frac : 10;
    unsigned char exp : 5;
    unsigned char sign : 1;
} __attribute__((packed));

struct fp32_pack
{
    unsigned int frac : 23;
    unsigned char exp : 8;
    unsigned char sign : 1;
} __attribute__((packed));
#endif

typedef struct fp16_pack __fp16;

static inline float fp16_to_fp32(__fp16 data)
{
    float f;
    struct fp32_pack* fp32 = (struct fp32_pack*)&f;
    struct fp16_pack* fp16 = &data;

    int exp = fp16->exp;

    if (exp == 31 && fp16->frac != 0)
    {
        // return __builtin_inf()-__builtin_inf();
        fp32->sign = fp16->sign;
        fp32->exp = 255;
        fp32->frac = 1;

        return f;
    }

    if (exp == 31)
        exp = 255;
    if (exp == 0)
        exp = 0;
    else
        exp = (exp - 15) + 127;

    fp32->exp = exp;
    fp32->sign = fp16->sign;
    fp32->frac = ((int)fp16->frac) << 13;

    return f;
}

static inline __fp16 fp32_to_fp16(float data)
{
    struct fp32_pack* fp32 = (struct fp32_pack*)&data;
    struct fp16_pack fp16;

    int exp = fp32->exp;

    if (fp32->exp == 255 && fp32->frac != 0)
    {
        // NaN
        fp16.exp = 31;
        fp16.frac = 1;
        fp16.sign = fp32->sign;

        return fp16;
    }

    if ((exp - 127) < -14)
        exp = 0;
    else if ((exp - 127) > 15)
        exp = 31;
    else
        exp = exp - 127 + 15;

    fp16.exp = exp;
    fp16.frac = fp32->frac >> 13;
    fp16.sign = fp32->sign;

    return fp16;
}
#endif

#endif

#ifdef __cplusplus
}
#endif
#endif
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值