4. ArchitectureLab
Part A
Function sum_list
Code:
.pos 0
irmovq stack, %rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1, %rdi
call sum_list
ret
sum_list:
irmovq $0x0, %rax # rax: long val
irmovq $0x0, %rdx # rdx: constant 0
irmovq $0x8, %r8 # r8: constant 8
jmp test
loop:
mrmovq (%rdi), %r9 # rp: ls->val
addq %r9, %rax # val += ls->val;
addq %r8, %rdi
mrmovq (%rdi), %rdi # ls = ls->next;
test:
rrmovq %rdi, %rcx
subq %rdx, %rcx
jne loop
done:
ret # return val
.pos 0x200
stack:
Running Result:
myarch% ./yas sum.ys
myarch% ./yis sum.yo
Stopped in 35 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rsp: 0x0000000000000000 0x0000000000000200
%r8: 0x0000000000000000 0x0000000000000008
%r9: 0x0000000000000000 0x0000000000000c00
Changes to memory:
0x01f0: 0x0000000000000000 0x000000000000005b
0x01f8: 0x0000000000000000 0x0000000000000013
Function rsum_list
Code:
.pos 0
irmovq stack, %rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1, %rdi
call rsum_list
ret
rsum_list:
rrmovq %rdi, %r9
irmovq $0x0, %rsi # rsi: constant 0
irmovq $0x8, %r8 # r8: constant 8
subq %rsi, %r9
jne else
irmovq $0, %rax
ret
else:
mrmovq (%rdi), %rax # val = ls->val
pushq %rax
addq %r8, %rdi
mrmovq (%rdi), %rdi
call rsum_list # rest = rsum_list(ls->next);
popq %rcx
addq %rcx, %rax
ret
.pos 0x200
stack:
Running Result:
myarch% ./yas rsum.ys
myarch% ./yis rsum.yo
Stopped in 52 steps at PC = 0x13. Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rcx: 0x0000000000000000 0x000000000000000a
%rsp: 0x0000000000000000 0x0000000000000200
%r8: 0x0000000000000000 0x0000000000000008
Changes to memory:
0x01c0: 0x0000000000000000 0x00000000000000a9
0x01c8: 0x0000000000000000 0x0000000000000c00
0x01d0: 0x0000000000000000 0x00000000000000a9
0x01d8: 0x0000000000000000 0x00000000000000b0
0x01e0: 0x0000000000000000 0x00000000000000a9
0x01e8: 0x0000000000000000 0x000000000000000a
0x01f0: 0x0000000000000000 0x000000000000005b
0x01f8: 0x0000000000000000 0x0000000000000013
Function copy_block
Code:
.pos 0
irmovq stack, %rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
ele2:
.quad 0x0b0
ele3:
.quad 0xc00
main:
irmovq ele1, %rdi
irmovq $0x40, %r8
subq %r8, %rsp
irmovq ele1, %rdi # rdi: src
rrmovq %rsp, %rsi # rsi: dest
irmovq $0x3, %rcx # rcx: len
pushq %r8
call copy_block
popq %r8
addq %r8, %rsp
ret
copy_block:
irmovq $0x0, %rax
irmovq $0x8, %r8
irmovq $0x0, %r9
irmovq $0x1, %r10
jmp test
loop:
mrmovq (%rdi), %rdx
rmmovq %rdx, (%rsi)
xorq %rdx, %rax
addq %r8, %rdi
addq %r8, %rsi
subq %r10, %rcx
test:
subq %r9, %rcx
jne loop
ret
.pos 0x200
stack:
Running Result:
myarch% ./yas copy.ys
myarch% ./yis copy.yo
Stopped in 46 steps at PC = 0x13. Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rdx: 0x0000000000000000 0x0000000000000c00
%rsp: 0x0000000000000000 0x0000000000000200
%rsi: 0x0000000000000000 0x00000000000001d0
%rdi: 0x0000000000000000 0x0000000000000030
%r8: 0x0000000000000000 0x0000000000000040
%r10: 0x0000000000000000 0x0000000000000001
Changes to memory:
0x01a8: 0x0000000000000000 0x0000000000000067
0x01b0: 0x0000000000000000 0x0000000000000040
0x01b8: 0x0000000000000000 0x000000000000000a
0x01c0: 0x0000000000000000 0x00000000000000b0
0x01c8: 0x0000000000000000 0x0000000000000c00
0x01f8: 0x0000000000000000 0x0000000000000013
Part B
IADDQ设计:
fetch:
icode:ifun <-- M1[PC]
rA:rB <-- M1[PC+1]
valC <-- M8[PC+2]
valP <-- PC+10
decode:
valB <-- R[rB]
execute:
valE <-- valC + valB
Set CC
memory access:
write back:
R[rB] <-- valE
PC update:
PC <-- valP
Part C
这里更改了pipe-full.hcl
文件,添加了iaddq
指令,并且将JXX
指令的分支预测策略改为了从不跳转。
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
iaddq $1, %rax # count++
Npos:
iaddq $-1, %rdx # len--
iaddq $8, %rdi # src++
iaddq $8, %rsi # dst++
andq %rdx, %rdx # len > 0?
jg Loop # if so, goto Loop:
##################################################################
benchmark reuslt:
Average CPE 2.01
Score 60.0/60.0
如果Npos标签内这么写
Npos:
iaddq $8, %rdi # src++
iaddq $8, %rsi # dst++
iaddq $-1, %rdx # len--
jg Loop # if so, goto Loop:
Average CPE 1.96
Score 60.0/60.0