工程简介
使用ARM汇编语言,计算两个二维数组A和B的卷积,通过软件仿真验证。其中A为6×6矩阵:
[
0
x
23
0
x
25
0
x
27
0
x
85
0
x
86
0
x
87
0
x
33
0
x
35
0
x
35
0
x
95
0
x
95
0
x
98
0
x
44
0
x
45
0
x
44
0
x
A
5
0
x
A
6
0
x
A
7
0
x
D
5
0
x
D
6
0
x
D
7
0
x
68
0
x
69
0
x
7
A
0
x
F
D
0
x
F
F
0
x
F
E
0
x
42
0
x
43
0
x
43
0
x
E
A
0
x
E
B
0
x
E
C
0
x
55
0
x
56
0
x
56
]
\begin{bmatrix} 0\mathrm{x}23 & 0\mathrm{x}25 & 0\mathrm{x}27 & 0\mathrm{x}85 & 0\mathrm{x}86 & 0\mathrm{x}87\\ 0\mathrm{x}33 & 0\mathrm{x}35 & 0\mathrm{x}35 & 0\mathrm{x}95 & 0\mathrm{x}95 & 0\mathrm{x}98\\ 0\mathrm{x}44 & 0\mathrm{x}45 & 0\mathrm{x}44 & 0\mathrm{x}A5 & 0\mathrm{x}A6 & 0\mathrm{x}A7\\ 0\mathrm{x}D5 & 0\mathrm{x}D6 & 0\mathrm{x}D7 & 0\mathrm{x}68 & 0\mathrm{x}69 & 0\mathrm{x}7A\\ 0\mathrm{x}FD & 0\mathrm{x}FF & 0\mathrm{x}FE & 0\mathrm{x}42 & 0\mathrm{x}43 & 0\mathrm{x}43\\ 0\mathrm{x}EA & 0\mathrm{x}EB & 0\mathrm{x}EC & 0\mathrm{x}55 & 0\mathrm{x}56 & 0\mathrm{x}56\end{bmatrix}
0x230x330x440xD50xFD0xEA0x250x350x450xD60xFF0xEB0x270x350x440xD70xFE0xEC0x850x950xA50x680x420x550x860x950xA60x690x430x560x870x980xA70x7A0x430x56
卷积核B为3×3矩阵:
[
1
2
1
0
0
0
−
1
−
2
−
1
]
\begin{bmatrix} 1 & 2 & 1\\ 0 & 0& 0\\ -1 & -2 & -1\end{bmatrix}
10−120−210−1
卷积步长为1,padding为0,输出的矩阵为4×4大小
编译环境:Keil 5.37,芯片为Samsung S3C2410A,ARM核;编译器为Arm Compiler V5.06,使用Simulator软件仿真。
⚠️ 注意:上述矩阵尺寸可以在代码中自行修改,算法是相同的。
⚠️ 注意:使用Keil 5.37及以上版本安装包不自带Arm Compiler V5,需要手动安装。具体方法:下载旧版安装包,提取里面的V5编译器文件夹至当前Keil安装路径下,并在Keil File Extensions内手动添加。
源代码
C代码
C代码与汇编代码算法对应,编译环境:gcc;汇编结果可用C验证。
#include <stdio.h>
#include <stdint.h>
int main() {
uint16_t A[6][6] = {
{0x23, 0x25, 0x27, 0x85, 0x86, 0x87},
{0x33, 0x35, 0x35, 0x95, 0x95, 0x98},
{0x44, 0x45, 0x44, 0xA5, 0XA6, 0XA7},
{0xD5, 0XD6, 0XD7, 0X68, 0X69, 0X7A},
{0XFD, 0XFF, 0XFE, 0X42, 0X43, 0X43},
{0XEA, 0XEB, 0XEC, 0X55, 0X56, 0X56}
};
int16_t kernel[3][3] = {
{1, 2, 1},
{0, 0, 0},
{-1, -2, -1}
};
int16_t c[4][4] = {0};
int i, j, k, l;
int16_t sum;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
sum = 0;
for (k = 0; k < 3; k++) {
for (l = 0; l < 3; l++) {
sum += kernel[k][l] * A[i+k][j+l];
}
}
c[i][j] = sum;
}
}
// for (k = 0; k < 4; k++) {
// for (l = 0; l < 4; l++) {
// printf("%d ", c[k][l]);
// }
// printf("\n");
// }
return 0;
}
汇编源代码
使用简单直接的汇编代码计算卷积,算法对应上述C语言代码。r4
、r5
、r6
、r7
作为for
循环索引,r8
、r9
、r10
各保存二维矩阵数据基地址、卷积核数据基地址、输出数据基地址。其余寄存器用到r11
、r12
及堆栈(栈顶指针 r13
需要初始化赋值)。
⚠️ 注意:DCD
是按字(32位)对齐的
DATA_SIZE EQU 6
KERNEL_SIZE EQU 3
OUTPUT_SIZE EQU 4
AREA DEMO, CODE, READONLY
ENTRY
START
MOV r4, #0 ; row of data, i
MOV r13, #2000 ; initialize the stack size
LDR r8, =DATA ; r8, address of data
LDR r9, =KERNEL ; r9, address of kernel
LDR r10, =OUTPUT ; r10, address of output
FOR_ROW
MOV r5, #0 ; column of data, j
FOR_COLUMN
MOV r6, #0 ; row of kernel, k
GET_KERNEL_ROW
MOV r7, #0 ; column of kernel, l
GET_SUM
; Get the data
ADD r11, r4, r6 ; r11 = i + k
MOV r12, #DATA_SIZE
MUL r11, r12, r11 ; r11 = (i + k) * 6
ADD r11, r11, r5 ; r11 = (i + k) * 6 + j
ADD r11, r11, r7 ; r11 = (i + k) * 6 + j + l
MOV r12, #4
MUL r11, r12, r11 ; *4
LDR r11, [r8, r11] ; data in r11
PUSH {r11} ; push data to stack
; Get the kernel
MOV r12, #KERNEL_SIZE
MUL r12, r6, r12 ; r12 = k*3
ADD r12, r12, r7 ; r12 = k*3 + l
MOV r11, #4
MUL r12, r11, r12
LDR r12, [r9, r12] ; get the kernel
POP {r11}
MUL r12, r11, r12 ; r12 = kernel * data
PUSH {r12} ; push the result to stack
; Calculate the result
MOV r11, #4
MUL r12, r4, r11 ; r12 = i * 4
ADD r12, r12, r5 ; r12 = (i * 4) + j
MUL r12, r11, r12 ; get the offset
LDR r11, [r10, r12]
POP {r12}
ADD r12, r11, r12 ; sum += result
PUSH {r12}
MOV r11, #4
MUL r12, r4, r11 ; r12 = i * 4
ADD r12, r12, r5 ; r12 = (i * 4) + j
MUL r12, r11, r12 ; get the offset again
; Store the result
POP {r11}
STR r11, [r10, r12]
ADD r7, r7, #1
CMP r7, #KERNEL_SIZE
BMI GET_SUM
ADD r6, r6, #1
CMP r6, #KERNEL_SIZE
BMI GET_KERNEL_ROW
ADD r5, r5, #1
CMP r5, #OUTPUT_SIZE
BMI FOR_COLUMN
ADD r4, r4, #1
CMP r4, #OUTPUT_SIZE
BMI FOR_ROW
DATA DCD 0x23, 0x25, 0x27, 0x85, 0x86, 0x87
DCD 0x33, 0x35, 0x35, 0x95, 0x95, 0x98
DCD 0x44, 0x45, 0x44, 0xA5, 0xA6, 0xA7
DCD 0xD5, 0xD6, 0xD7, 0x68, 0x69, 0x7A
DCD 0xFD, 0xFF, 0xFE, 0x42, 0x43, 0x43
DCD 0xEA, 0xEB, 0xEC, 0x55, 0x56, 0x56
KERNEL DCD 1, 2, 1
DCD 0, 0, 0
DCD -1, -2, -1
OUTPUT DCD 0, 0, 0, 0
DCD 0, 0, 0, 0
DCD 0, 0, 0, 0
DCD 0, 0, 0, 0
END
调试结果
使用软件仿真可能需要给对应地址读写、执行的权限,“Initialization File”内手动添加map.ini
,形如:
map 0x0000, 0xFFFFF exec read write
查看输出数据,在Memory1下设置观察0x184地址数据,可用前述C语言代码计算结果进行比对。