一.序列数据
样本\序列 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
Sample_1 | A | B | E | C | D | E | F | G | A | B | C |
Sample_2 | B | A | B | C | D | E | F | G | A | B | C |
Sample_2 | A | B | C | D | E | F | G | A | B | C | D |
Sample_2 | A | D | C | D | E | F | G | A | B | C | M |
Sample_5 | N | A | B | C | D | E | F | G | A | B | C |
原维度:shape =(#sample,#feature)= (5,11)
reshape:shape = (#sample,#feature,seq_len,height_y) = (n,c,h,w) = (5,1,11,1) #序列数据?height_y = 1(固定)
二.代码注释
以下代码只提取了需要理解的部分,且为修改后的代码。
def get_windows(X, win_x, win_y, stride_x=1, stride_y=1, pad_x=0, pad_y=0):
"""
parallizing get_windows
Arguments:
X (ndarray): n x c x h x w
Return:
X_win (ndarray): n x nh x nw x nc
"""
n, c, h, w = X.shape
nc = win_y * win_x * c
nh = (h - win_x) / stride_x + 1
nw = (w - win_y) / stride_y + 1
X_win = np.empty(( nc, n * nh * nw), dtype=np.float32)
Parallel(n_jobs=-1)(delayed(get_windows_channel)
(X, X_win, des_id, nw, nh, win_x, win_y, stride_x, stride_y)
for des_id in range(c * win_x * win_y))
X_win = X_win.reshape((n, nh, nw, nc))
return X_win
X_win.shape = (nc,n*nh*nw) → (n,nh,nw,nc) 即
#windows/per sample(见上图) :
#在x轴方向上窗口移动后的样本的向量长度 / #pooled_feature_x:
#在y轴方向上窗口移动后的样本的向量长度 / #pooled_feature_y:(序列数据:w = 1 , stride_y = 1)
注意:其中的Parallel()(delayed(get_windows_channel)...)代码,是指在此时要运行如下代码:
def get_windows_channel(X, X_win, des_id, nw, nh, win_x, win_y, stride_x, stride_y):
"""
X: N x C x H x W
X_win: N x nc x nh x nw
(k, di, dj) in range(X.channle, win_y, win_x)
"""
#des_id = (k * win_y + di) * win_x + dj
dj = des_id % win_x
di = des_id / win_x % win_y
k = des_id / win_x / win_y
src = X[:, k, dj:dj+nh*stride_x:stride_x, di:di+nw*stride_y:stride_y].ravel()
des = X_win[des_id, :]
np.copyto(des, src)
实际上就是:
for des_id in range(c * win_x * win_y) = range(nc):
for k in range(c) = range(#feature) = range(1) = 0:
for di in range(win_y):
for dj in range(win_x):
des.shape = (n*nh*nw)
src.shape = [n * (dj:dj+nh*stride_x:stride_x) * (di:di+nw*stride_y:stride_y)] #双冒号为切片(start:end:step)
src和des的维度应该是一致的。