数据结构
数据:NHWC
权重:OIHW
C:
void convOp(Blob input,Blob* output,conv_s convInfo, wtParam param){
int sh = convInfo.strideH, sw = convInfo.strideW,
ph = convInfo.convPad.padH, pw = convInfo.convPad.padW,
kh = convInfo.kernelH, kw = convInfo.kernelW;
Blob padedblob;
padedblob.n = input.n;
padedblob.h = input.h + 2 * ph;
padedblob.w = input.w + 2 * pw;
padedblob.c = input.c;
padedblob.dataFmt = input.dataFmt;
padedblob.dataType = input.dataType;
padedblob.data = malloc(sizeof(float) * padedblob.n * padedblob.h * padedblob.w * padedblob.c);
memset(padedblob.data, 0, sizeof(float) * padedblob.n * padedblob.h * padedblob.w * padedblob.c);
//补边
for(int n = 0; n < input.n; n++){
for(int c = 0; c < input.c; c++){
for(int h = 0; h < input.h; h++){
for(int w = 0; w < input.w; w++){
blobSet(
n,
h + ph,
w + pw,
c,
padedblob,
blobGet(
n,
h,
w,
c,
input
)
);
}
}
}
}
output->data = malloc(sizeof(float) * output->n * output->h * output->w * output->c);
memset(output->data, 0, sizeof(float) * output->n * output->h * output->w * output->c);
//卷积计算
for(int n = 0; n < padedblob.n; n++){
for(int oc = 0; oc < param.n; oc++){
// wt[oc, 256, 3, 3]
for(int inh = 0; inh <= padedblob.h - kh; inh += sh){
for(int inw = 0; inw <= padedblob.w - kw; inw += sw){
float sum = 0;
for(int inc = 0; inc < padedblob.c; inc++){
for(int offx = 0; offx < kh; offx++){
for(int offy = 0; offy < kw; offy++){
sum += blobGet(n, inh + offx, inw + offy, inc, padedblob) *
wtGet(oc, offx, offy, inc, param);
}
}
}
blobSet(n, inh / sh, inw / sw, oc, *output, sum);
}
}
}
}
}
Python:
def convOp(inblob, node_size, node_shape, param):
"""param : OIHW
return: outblob->nparray
"""
param = param[0]
kh = int(node_shape[1][0])
kw = int(node_shape[1][1])
sh = int(node_shape[2][0])
sw = int(node_shape[2][1])
ph = int(node_shape[3][0])
pw = int(node_shape[3][1])
if ph != 0 or pw != 0:
# 补边儿
newn = inblob.shape[0]
newh = inblob.shape[1] + 2 * ph
neww = inblob.shape[2] + 2 * pw
newc = inblob.shape[3]
paddedblob = np.zeros((newn, newh, neww, newc), dtype=np.float32)
paddedblob[:, ph:ph + inblob.shape[1], pw:pw + inblob.shape[2], :] = inblob
else:
paddedblob = inblob
outblob = np.zeros(tuple(node_size), dtype=np.float32)
# 权重是OIHW,而数据是NHWC,为了H与W能够在numpy中通过广播的方式计算,需要把权重转置成OHWI,numpy的转置几乎不消耗时间,因为该转置是不改变数据在内存中表示的。
# 权重:OIHW OHWI
# 数据: NHWC
param = np.transpose(param, [0, 2, 3, 1])
#卷积计算
for oc in range(param.shape[0]): # O of weight
for h in range(outblob.shape[1]): # H of outblob
for w in range(outblob.shape[2]):
outblob[:, h, w, oc] = np.sum(paddedblob[
:,
h * sh: h * sh + param.shape[1],
w * sw: w * sw + param.shape[2],
:] * param[oc, :, :, :])
return outblob