s = A·B = a1·b1 + a2·b2 + a3·b3 + ... + an·bn:
MOV r4, #0 ; clear initial sum in r4
MOV r5, #24 ; load loop counter with n (assume 24 here)
ADR r0, A ; r0 points at vector A
ADR r1, B ; r1 points at vector B
Next
LDR r2, [r0], #4 ; Repeat: get Ai and update pointer to A
LDR r3, [r1], #4 ; get Bi and update pointer to B
MLA r4, r2, r3, r4 ; s = s + Ai x Bi
SUBS r5, r5, #1 ; decrement loop counter
BNE Next ; repeat n times