CSAPP ArchitectureLab

4. ArchitectureLab

Part A

Function sum_list

Code:

.pos 0
irmovq stack, %rsp
call main
halt

# Sample linked list
.align 8
ele1:
        .quad   0x00a
        .quad   ele2
ele2:
        .quad   0x0b0
        .quad   ele3
ele3:
        .quad   0xc00
        .quad   0

main:
        irmovq  ele1, %rdi
        call    sum_list
        ret

sum_list:
        irmovq  $0x0, %rax                # rax:  long val
        irmovq  $0x0, %rdx                # rdx:  constant 0
        irmovq  $0x8, %r8                 # r8:   constant 8
        jmp     test
loop:
        mrmovq  (%rdi), %r9             # rp:   ls->val
        addq    %r9, %rax               # val += ls->val;
        addq    %r8, %rdi
        mrmovq  (%rdi), %rdi            # ls = ls->next;
test:
        rrmovq  %rdi, %rcx
        subq    %rdx, %rcx
        jne     loop
done:
        ret                                             # return val

.pos 0x200
stack:

Running Result:

myarch% ./yas sum.ys
myarch% ./yis sum.yo
Stopped in 35 steps at PC = 0x13.  Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rsp:   0x0000000000000000      0x0000000000000200
%r8:    0x0000000000000000      0x0000000000000008
%r9:    0x0000000000000000      0x0000000000000c00

Changes to memory:
0x01f0: 0x0000000000000000      0x000000000000005b
0x01f8: 0x0000000000000000      0x0000000000000013

Function rsum_list

Code:

.pos 0
irmovq stack, %rsp
call main
halt

# Sample linked list
.align 8
ele1:
        .quad   0x00a
        .quad   ele2
ele2:
        .quad   0x0b0
        .quad   ele3
ele3:
        .quad   0xc00
        .quad   0

main:
        irmovq  ele1, %rdi
        call    rsum_list
        ret

rsum_list:
        rrmovq  %rdi, %r9
        irmovq  $0x0, %rsi        # rsi:  constant 0
        irmovq  $0x8, %r8         # r8:   constant 8
        subq    %rsi, %r9
        jne     else
        irmovq  $0, %rax
        ret
else:
        mrmovq  (%rdi), %rax    # val = ls->val
        pushq   %rax
        addq    %r8, %rdi
        mrmovq  (%rdi), %rdi
        call    rsum_list       # rest = rsum_list(ls->next);
        popq    %rcx
        addq    %rcx, %rax
        ret


.pos 0x200
stack:

Running Result:

myarch% ./yas rsum.ys
myarch% ./yis rsum.yo
Stopped in 52 steps at PC = 0x13.  Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rcx:   0x0000000000000000      0x000000000000000a
%rsp:   0x0000000000000000      0x0000000000000200
%r8:    0x0000000000000000      0x0000000000000008

Changes to memory:
0x01c0: 0x0000000000000000      0x00000000000000a9
0x01c8: 0x0000000000000000      0x0000000000000c00
0x01d0: 0x0000000000000000      0x00000000000000a9
0x01d8: 0x0000000000000000      0x00000000000000b0
0x01e0: 0x0000000000000000      0x00000000000000a9
0x01e8: 0x0000000000000000      0x000000000000000a
0x01f0: 0x0000000000000000      0x000000000000005b
0x01f8: 0x0000000000000000      0x0000000000000013

Function copy_block

Code:

.pos 0
irmovq stack, %rsp
call main
halt

# Sample linked list
.align 8
ele1:
        .quad   0x00a
ele2:
        .quad   0x0b0
ele3:
        .quad   0xc00

main:
        irmovq  ele1, %rdi
        irmovq  $0x40, %r8
        subq    %r8, %rsp
        irmovq  ele1, %rdi      # rdi: src
        rrmovq  %rsp, %rsi      # rsi: dest
        irmovq  $0x3, %rcx      # rcx: len
        pushq   %r8
        call    copy_block
        popq    %r8
        addq    %r8, %rsp
        ret

copy_block:
        irmovq  $0x0, %rax
        irmovq  $0x8, %r8
        irmovq  $0x0, %r9
        irmovq  $0x1, %r10
        jmp     test
loop:
        mrmovq  (%rdi), %rdx
        rmmovq  %rdx, (%rsi)
        xorq    %rdx, %rax
        addq    %r8, %rdi
        addq    %r8, %rsi
        subq    %r10, %rcx
test:
        subq    %r9, %rcx
        jne     loop
        ret


.pos 0x200
stack:

Running Result:

myarch% ./yas copy.ys
myarch% ./yis copy.yo
Stopped in 46 steps at PC = 0x13.  Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rdx:   0x0000000000000000      0x0000000000000c00
%rsp:   0x0000000000000000      0x0000000000000200
%rsi:   0x0000000000000000      0x00000000000001d0
%rdi:   0x0000000000000000      0x0000000000000030
%r8:    0x0000000000000000      0x0000000000000040
%r10:   0x0000000000000000      0x0000000000000001

Changes to memory:
0x01a8: 0x0000000000000000      0x0000000000000067
0x01b0: 0x0000000000000000      0x0000000000000040
0x01b8: 0x0000000000000000      0x000000000000000a
0x01c0: 0x0000000000000000      0x00000000000000b0
0x01c8: 0x0000000000000000      0x0000000000000c00
0x01f8: 0x0000000000000000      0x0000000000000013

Part B

IADDQ设计:

fetch:
        icode:ifun      <--     M1[PC]
        rA:rB           <--     M1[PC+1]
        valC            <--     M8[PC+2]
        valP            <--     PC+10
decode:

        valB            <--     R[rB]
execute:
        valE            <--     valC + valB
        Set CC
memory access:

write back:
        R[rB]           <--     valE
PC update:
        PC              <--     valP

Part C

这里更改了pipe-full.hcl文件,添加了iaddq指令,并且将JXX指令的分支预测策略改为了从不跳转

##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:

##################################################################
# You can modify this portion
        # Loop header
        xorq %rax,%rax          # count = 0;
        andq %rdx,%rdx          # len <= 0?
        jle Done                # if so, goto Done:
Loop:
        mrmovq (%rdi), %r10     # read val from src...
        rmmovq %r10, (%rsi)     # ...and store it to dst
        andq %r10, %r10         # val <= 0?
        jle Npos                # if so, goto Npos:
        iaddq $1, %rax          # count++
Npos:
        iaddq $-1, %rdx         # len--
        iaddq $8, %rdi          # src++
        iaddq $8, %rsi          # dst++
        andq %rdx, %rdx         # len > 0?
        jg Loop                 # if so, goto Loop:
##################################################################

benchmark reuslt:

Average CPE     2.01
Score   60.0/60.0

如果Npos标签内这么写

Npos:
        iaddq $8, %rdi          # src++
        iaddq $8, %rsi          # dst++
        iaddq $-1, %rdx         # len--
        jg Loop                 # if so, goto Loop:
Average CPE     1.96
Score   60.0/60.0
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值