先定义一下四个值取最大函数
#define max(a,b) ((a>b)?a:b)
Dtype_16f maxfour(Dtype_16f a,Dtype_16f b,Dtype_16f c,Dtype_16f d){
return max(max(a,b),max(c,d));
}
这个函数就是简单的复制,封装成函数可以并行,不封装的看分析好像不并行,我也不清楚
void cpy_poolbuf(Dtype_16f* In_ddr,Dtype_16f pool_bus1[Tww],INT8 Tw,int of){
memcpy(pool_bus1, (Dtype_16f *)(In_ddr + of), Tw*sizeof(Dtype_16f));
}
Add 函数
void Add(Dtype_16f* In_ddr,Dtype_16f* In_ddr2,Dtype_16f pool_in[inchannel][Thh][Tww],
INT8 Th,INT8 Tw, Dtype_11f hz,
Dtype_11f wz,Dtype_11f cz,Dtype_11f win,Dtype_11f chin, Dtype_16f winxhin){
INT8 cc,hh,ww;
Dtype_11f inchan=inchannel;
int of;
static Dtype_16f pool_bus1[Tww];
static Dtype_16f pool_bus2[Tww];
// memcpy(pool_bus1, (Dtype_16f *)(In_ddr + of), Tw*sizeof(Dtype_16f));
// memcpy(pool_bus2, (Dtype_16f *)(In_ddr2 + of), Tw*sizeof(Dtype_16f));
if(cz+inchannel>chin)
inchan=chin-cz;
for(cc=0;cc<inchan;cc++){
Dtype_11f C=cc+cz;
for(hh=0;hh<Th;hh++){
Dtype_11f H=hh+hz;
of=C*winxhin+H*win+wz;
cpy_poolbuf(In_ddr, pool_bus1, Tw, of);
cpy_poolbuf(In_ddr2, pool_bus2, Tw, of);
// memcpy(pool_bus1, (Dtype_16f *)(In_ddr + of), Tw*sizeof(Dtype_16f));
// memcpy(pool_bus2, (Dtype_16f *)(In_ddr2 + of), Tw*sizeof(Dtype_16f));
for(ww=0;ww<Tw;ww++){
pool_in[cc][hh][ww]=pool_bus1[ww]+pool_bus2[ww];
}
}
}
}
这个函数是将add函数和pool函数写在一起的,主要是我的网络 YOLO-ghostnet(https://kns.cnki.net/kcms/detail/detail.aspx?dbcode=CAPJ&dbname=CAPJLAST&filename=JSJC20210318000&v=g9BjGJf5ZLX%25mmd2FERDnZ0yhaQE3qlTNYNvbEOMhXkvxwGY7cDtv9Pc4mCD4L1Cogb0u)的结构中都是 add后面跟pool
void pool(Dtype_16f* In_ddr,Dtype_16f* In_ddr2,Dtype_16f* pool_outddr,Dtype_16f pool_in[inchannel][Thh][Tww],
INT8 Th,INT8 Tw, Dtype_11f hz,
Dtype_11f wz,Dtype_11f cz,Dtype_11f win,Dtype_11f chin, Dtype_16f winxhin){
INT8 cc,hh,ww;
Dtype_11f Wout=win>>1;
// load_poolin(In_ddr,In_ddr2, pool_in,Th, Tw, hz,wz, cz, win, chin, winxhin);
INT8 th=Th>>1;
INT8 tw=Tw>>1;
Dtype_11f hzz=hz>>1;
Dtype_11f wzz=wz>>1;
static Dtype_16f pool_lin[tww];
Dtype_11f inchan=inchannel;
if(cz+inchannel>chin)
inchan=chin-cz;
Add(In_ddr,In_ddr2, pool_in,Th, Tw, hz,wz, cz, win, chin, winxhin);
for(cc=0;cc<inchan;cc++){
for(hh=0;hh<th;hh++){
INT8 h2=hh<<1;
for(ww=0;ww<tw;ww++){
//#pragma HLS PIPELINE
INT8 w2=ww<<1;
pool_lin[ww]=maxfour(
pool_in[cc][h2][w2],
pool_in[cc][h2+1][w2],
pool_in[cc][h2][w2+1],
pool_in[cc][h2+1][w2+1]
);
}
Dtype_11f hof=hzz+hh;
Dtype_11f cof=cz+cc;
int off=cof*Wout*Wout+hof*Wout+wzz;
memcpy((Dtype_16f *)(pool_outddr + off),pool_lin,tw*sizeof(Dtype_16f));
}
}
}