内核中的 Data watchpoint and Trace unit (DWT) 可以实现指令数的测量。具体实现过程如下
内核中有DWT_CYCCNT,DWT_CPICNT,DWT_EXCCNT,DWT_SLEEPCNT,DWT_LSUCNT,DWT_FOLDCNT 六个寄存器。
除了DWT_CYCCNT以外,其它寄存器都是8位的,若溢出测导致测量结果不准确。 所以只适用于少量代码指令数的测量。
指令数 instruction count 可以通过如下公式结算得出。
具体code 实现过程如下,主要是测量方法和需要配置的内核寄存器。
#define REG_READ(addr) *((volatile u32 *)addr)
#define REG_WRITE(addr, val) *((volatile u32 *)addr) = (val)
/* cortex-m7 debug system register */
#define DEMCR 0xE000EDFC
#define DEMCR_TRCENA (1UL << 24)
#define DWT_CTRL_CYCCNTENA (1UL << 0)
#define DWT_CTRL_CPIEVTENA (1UL << 17)
#define DWT_CTRL_EXCEVTENA (1UL << 18)
#define DWT_CTRL_SLEEPEVTENA (1UL << 19)
#define DWT_CTRL_LSUEVTENA (1UL << 20)
#define DWT_CTRL_FOLDEVTENA (1UL << 21)
#define PMU_READ_CYCLE() REG_READ(DWT_CYCCNT) + \
REG_READ(DWT_FOLDCNT) - \
REG_READ(DWT_LSUCNT) - \
REG_READ(DWT_EXCCNT) - \
REG_READ(DWT_SLEEPCNT) - \
REG_READ(DWT_CPICNT)
struct perf_event {
u32 count;
u32 prev_count;
};
void pmu_enable(void)
{
u32 val;
val = REG_READ(DEMCR);
REG_WRITE(DEMCR, (val | DEMCR_TRCENA));
}
void pmu_event_start()
{
u32 val;
val = REG_READ(DWT_CTRL);
REG_WRITE(DWT_CTRL, (val | DWT_CTRL_CYCCNTENA |
DWT_CTRL_CPIEVTENA |
DWT_CTRL_EXCEVTENA |
DWT_CTRL_SLEEPEVTENA |
DWT_CTRL_LSUEVTENA |
DWT_CTRL_FOLDEVTENA));
}
void pmu_event_stop()
{
u32 val;
val = REG_READ(DWT_CTRL);
REG_WRITE(DWT_CTRL, (val &
~(DWT_CTRL_CYCCNTENA |
DWT_CTRL_CPIEVTENA |
DWT_CTRL_EXCEVTENA |
DWT_CTRL_SLEEPEVTENA |
DWT_CTRL_LSUEVTENA |
DWT_CTRL_FOLDEVTENA)));
}
void pmu_event_read(struct perf_event *event)
{
u32 curr;
curr = PMU_READ_CYCLE();
event->count = curr - event->prev_count;
event->prev_count = curr;
}
main()
{
....
struct perf_event perf_evts;
pmu_event_start();
pmu_event_read(&perf_evts);
function(); /*需要测试指令数的代码 */
pmu_event_read(&perf_evts);
pmu_event_stop();
printf(">>>%d is ICNT <<<.\n", perf_evts.count);
...
}