【高阶】ARMv8/aarch64下TSC(Time Stamp Counter)读取方法

在x86架构中,我们对Time Stamp Counter (TSC) 寄存器非常熟悉,通过这个寄存器对代码执行时间的衡量可精确到CPU Cycle级别。

      但在ARM/ARMv8/aarch64架构中,并没有与x86 TSC对应的寄存器和直接对应的汇编指令rdtsc。

      若想在ARMv8架构中,统计计算代码执行时间达到CPU Cycle级别,也需要读取类似x86的TSC寄存器。在ARMv8中,有Performance Monitors Control Register系列寄存器,其中PMCCNTR_EL0就类似于x86的TSC寄存器。本文介绍Linux下读取ARM TSC方法。

      读取这个PMCCNTR_EL0寄存器值,就可以知道当前CPU已运行了多少Cycle。但在ARM下读取CPU Cycle和x86有所不同:

    1、x86用户态代码可以随便读取TSC值。但在ARM,默认情况是用户态是不可以读的,需要在内核态使能后,用户态才能读取

       开关在由寄存器PMCR_EL0控制。实际上这个寄存器控制整个PMU寄存器在用户态是否可读写,不仅仅是PMCCNTR_EL0。

      在内核态使能,可以是编写单独内核模块,也可以在内核代码任意被执行的位置加上设置使能PMU寄存器代码即可。Linux下使能(Enable)用户态访问PMU内核模块代码:

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

/*                                                                            

 * Enable user-mode ARM performance counter access.                           

 */                                                                           

#include <linux/kernel.h>                                                     

#include <linux/module.h>                                                     

#include <linux/smp.h>                                     

#define PERF_DEF_OPTS       (1 | 16)                                                                      

#define PERF_OPT_RESET_CYCLES   (2 | 4)                                                                 

#define PERF_OPT_DIV64      (8)                                                                         

#define ARMV8_PMCR_MASK         0x3f                                                                   

#define ARMV8_PMCR_E            (1 << 0) /* Enable all counters */                                     

#define ARMV8_PMCR_P            (1 << 1) /* Reset all counters */                                      

#define ARMV8_PMCR_C            (1 << 2) /* Cycle counter reset */                                     

#define ARMV8_PMCR_D            (1 << 3) /* CCNT counts every 64th cpu cycle */                        

#define ARMV8_PMCR_X            (1 << 4) /* Export to ETM */                                           

#define ARMV8_PMCR_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/                       

#define ARMV8_PMCR_LC           (1 << 6) /* Cycle Counter 64bit overflow*/

#define ARMV8_PMCR_N_SHIFT      11       /* Number of counters supported */                            

#define ARMV8_PMCR_N_MASK       0x1f                                                                              

#define ARMV8_PMUSERENR_EN_EL0  (1 << 0) /* EL0 access enable */                                       

#define ARMV8_PMUSERENR_CR      (1 << 2) /* Cycle counter read enable */                               

#define ARMV8_PMUSERENR_ER      (1 << 3) /* Event counter read enable */                               

                                                                                                         

static inline u32 armv8pmu_pmcr_read(void)                                                             

{                                                                                                      

        u64 val=0;                                                                                     

        asm volatile("mrs %0, pmcr_el0" : "=r" (val));                                                 

        return (u32)val;                                                                               

}                                                                                                      

static inline void armv8pmu_pmcr_write(u32 val)                                                        

{                                                     

        val &= ARMV8_PMCR_MASK;                                                                        

        isb();                                                                                         

        asm volatile("msr pmcr_el0, %0" : : "r" ((u64)val));                                           

}      

static inline  long long armv8_read_CNTPCT_EL0(void)

{

   long long val;

   asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val));

   return val;

}

              

static void                                                                                            

enable_cpu_counters(void* data)                                                                        

{                                                       

    u32 val=0;                                                        

    asm volatile("msr pmuserenr_el0, %0" : : "r"(0xf));

    armv8pmu_pmcr_write(ARMV8_PMCR_LC|ARMV8_PMCR_E);                                                     

        asm volatile("msr PMCNTENSET_EL0, %0" :: "r" ((u32)(1<<31)));

    armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E|ARMV8_PMCR_LC);  

        printk("\nCPU:%d ", smp_processor_id());

}                                                                                                      

static void                                                                                            

disable_cpu_counters(void* data)                                                                       

{                                                     

    u32 val=0;                                                                                            

    printk(KERN_INFO "\ndisabling user-mode PMU access on CPU #%d",                      

    smp_processor_id());                                      

    /* Program PMU and disable all counters */                                                            

        armv8pmu_pmcr_write(armv8pmu_pmcr_read() |~ARMV8_PMCR_E);                                             

    asm volatile("msr pmuserenr_el0, %0" : : "r"((u64)0));                                                      

}                                                           

static int __init                                                                                      

init(void)                                                                                             

{                                                                      

    u64 cval;

        u32 val;

        isb();

        asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(cval));

        printk("\nCPU Cycle count:%llu \n", cval);

        asm volatile("mrs %0, PMCNTENSET_EL0" : "=r"(val));

        printk("PMCNTENSET_EL0:%lX ", val);

        asm volatile("mrs %0, PMCR_EL0" : "=r"(val));

        printk("\nPMCR_EL0 Register:%lX ", val);

        on_each_cpu(enable_cpu_counters, NULL, 1);                                                            

        printk(KERN_INFO "Enable Access PMU Initialized");                                                      

    return 0;                                                                                             

}                                                                                               

static void __exit                                                                                     

fini(void)                                                

{                                                       

    on_each_cpu(disable_cpu_counters, NULL, 1);                                                           

    printk(KERN_INFO "Access PMU Disabled");                                                         

}                                                                                                             module_init(init);                                                                                     

module_exit(fini);

2、x86下TSC的值,在CPU上电后就开始累加,且是只读寄存器。但在ARM中,只有使能PMCCNTR_EL0后,TSC才开始累加计数,且PMCCNTR_EL0寄存器可清零,相当于计时器。

    用户态读取ARMv8 PMU寄存器代码:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

#include <stdio.h>

#include <sys/time.h>

#include <unistd.h>

/* All counters, including PMCCNTR_EL0, are disabled/enabled */

#define QUADD_ARMV8_PMCR_E      (1 << 0)

/* Reset all event counters, not including PMCCNTR_EL0, to 0 */

#define QUADD_ARMV8_PMCR_P      (1 << 1)

/* Reset PMCCNTR_EL0 to 0 */

#define QUADD_ARMV8_PMCR_C      (1 << 2)

/* Clock divider: PMCCNTR_EL0 counts every clock cycle/every 64 clock cycles */

#define QUADD_ARMV8_PMCR_D      (1 << 3)

/* Export of events is disabled/enabled */

#define QUADD_ARMV8_PMCR_X      (1 << 4)

/* Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited */

#define QUADD_ARMV8_PMCR_DP     (1 << 5)

/* Long cycle count enable */

#define QUADD_ARMV8_PMCR_LC     (1 << 6)

#define ARMV8_PMCR_MASK     0x3f     /* Mask for writable bits */

static inline unsigned int armv8_pmu_pmcr_read(void)

{

        unsigned int val;

        /* Read Performance Monitors Control Register */

        asm volatile("mrs %0, pmcr_el0" : "=r" (val));

        return val;

}

static inline void armv8_pmu_pmcr_write(unsigned int val)

{

    asm volatile("msr pmcr_el0, %0" : :"r" (val & ARMV8_PMCR_MASK));

}

static inline  long long armv8_read_CNTPCT_EL0(void)

{

   long long val;

   asm volatile("mrs %0, CNTVCT_EL0" : "=r" (val));

   return val;

}

static void enable_all_counters(void)

{

  

    return;

    unsigned int val;

    /* Enable all counters */

    val = armv8_pmu_pmcr_read();

    val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;

    armv8_pmu_pmcr_write(val);

}

static void reset_all_counters(void)

{

   return ;

   unsigned int val;

    val = armv8_pmu_pmcr_read();

    val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;

    armv8_pmu_pmcr_write(val);

}

static unsigned int enabled=0;

unsigned int readticks(unsigned int *result)

{

    struct timeval t;

    unsigned int cc;

    unsigned int val;

    if (!enabled) {

        reset_all_counters();

        enable_all_counters();

        enabled = 1;

    }

    cc = armv8_pmu_pmcr_read();

    gettimeofday(&t,(struct timezone *) 0);

    result[0] = cc;

    result[1] = t.tv_usec;

    result[2] = t.tv_sec;

   

    return cc;

}

static inline unsigned int armv8pmu_pmcr_read(void)

{

    unsigned int val;

    asm volatile("mrs %0, pmcr_el0" : "=r" (val));

    return val;

}

#define u32 unsigned int

#define u64 unsigned long long

#define isb()       asm volatile("isb" : : : "memory")

static inline u64 arch_counter_get_cntpct(void)

{

    u64 cval;

    isb();

        asm volatile("mrs %0, PMCCNTR_EL0" : "+r"(cval));

    return cval;

}

int main()

{

  unsigned int start,end;

  unsigned int result[3];

  unsigned long long timer;

  u32 pmcr_el;

  pmcr_el = armv8pmu_pmcr_read();

  printf("\nPMCR_EL0 Register:%lX ", pmcr_el);

  timer = arch_counter_get_cntpct();

  printf("\nCPU Cycle Count:0x%llX ",timer); 

  sleep(5);

  timer = arch_counter_get_cntpct();

  printf("\nCPU Cycle Count:0x%llX \n",timer);

  asm volatile("mrs %0, PMOVSCLR_EL0" : "=r"(pmcr_el));

  printf(" Register PMOVSCLR_EL0:0x%lX \n", pmcr_el);

   

  asm volatile("mrs %0, pmuserenr_el0" : "=r"(pmcr_el));

  printf(" Register pmuserenr_el0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCNTENSET_EL0" : "=r"(pmcr_el));

  printf(" Register PMCNTENSET_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCCFILTR_EL0" : "=r"(pmcr_el));

  printf(" Register PMCCFILTR_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMCNTENCLR_EL0" : "=r"(pmcr_el));

  printf(" Register PMCNTENCLR_EL0:0x%lX \n", pmcr_el);

  asm volatile("mrs %0, PMOVSSET_EL0" : "=r"(pmcr_el));

  printf(" Register PMOVSSET_EL0:0x%lX \n", pmcr_el);

  return 0;

}

The following table shows the PMCR_EL0 bit assignments for a System register access.

Table 11-4 PMCR_EL0 bit assignments

BitsNameFunction
[31:24]IMP

Implementer code:

0x41ARM.

This is a read-only field.

[23:16]IDCODE

Identification code:

0x01Cortex-A57 processor.

This is a read-only field.

[15:11]N

Number of event counters.

In Non-secure modes other than Hyp mode, this field reads the value of HDCR.HPMN. See 4.5.12 Hyp Debug Control Register.

In Secure state and Hyp mode, this field returns 0x6 that indicates the number of counters implemented.

This is a read-only field.

[10:7]Reserved, RES0.
[6]LC

Long cycle count enable. Selects which PMCCNTR_EL0 bit generates an overflow recorded in PMOVSR[31]:

0Overflow on increment that changes PMCCNTR_EL0[31] from 1 to 0.
1Overflow on increment that changes PMCCNTR_EL0[63] from 1 to 0.
[5]DP

Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited:

0Cycle counter operates regardless of the non-invasive debug authentication settings.
1Cycle counter is disabled if non-invasive debug is not permitted and enabled.

This bit is read/write.

[4]X

Export enable. This bit permits events to be exported to another debug device, such as a trace macrocell, over an event bus:

0Export of events is disabled.
1Export of events is enabled.

This bit is read/write and does not affect the generation of Performance Monitors interrupts, that can be implemented as a signal exported from the processor to an interrupt controller.

[3]D

Clock divider:

0When enabled, PMCCNTR_EL0 counts every clock cycle.
1When enabled, PMCCNTR_EL0 counts every 64 clock cycles.

This bit is read/write.

[2]C

Clock counter reset:

0No action.
1Reset PMCCNTR_EL0 to 0.

Note

Resetting PMCCNTR does not clear the PMCCNTR_EL0 overflow bit to 0. See the ARM® Architecture Reference Manual ARMv8 for more information.

This bit is write-only, and always RAZ.

[1]P

Event counter reset:

0No action.
1Reset all event counters, not including PMCCNTR_EL0, to 0.

In Non-secure modes other than Hyp mode, a write of 1 to this bit does not reset event counters that the HDCR.HPMN field reserves for Hyp mode use. See 4.5.12 Hyp Debug Control Register.

In Secure state and Hyp mode, a write of 1 to this bit resets all the event counters.

[0]E

Enable bit. This bit does not disable or enable, counting by event counters reserved for Hyp mode by HDCR.HPMN. It also does not suppress the generation of performance monitor overflow interrupt requests by those counters:

0All counters, including PMCCNTR_EL0, are disabled. This is the reset value.
1All counters are enabled.

This bit is read/write.

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值