当前版本: 0.1
完成日期: 2007-6-7
作者: Dajie Tan <jiankemeng@gmail.com>
0. 概述
oprofile 在Linux 上分两部分,一个是内核模块(oprofile.ko),一个为用户空间的守护进程(oprofiled)。前者负责访问性能计数器或者注册基于时间采样的函数(使用register_timer_hook注册之,使时钟中断处理程序最后执行profile_tick 时可以访问之),并采样置于内核的缓冲区内。后者在后台运行,负责从内核空间收集数据,写入文件。二者都需要添加对龙芯2E 的支持。
1. kernel patch
完整之 patch 位于: http://people.openrays.org/~comcat/patch/patch-2.6.18-oprofile-comcat
主要是当计数器溢出(31位为1)时,触发IP6 中断,其处理程序的 godson2e_pmc_handler 的编写。
注意一下在用户空间设置的样本计数,在内核空间是怎么处理的。例如使用
opcontrol --setup --event=CYCLES:1000 设置样本计数为1000,
则计数器被初始化的值为0x8000 0000 - 1000
即: CYCLES 事件发生 1000 次后,计数溢出,触发处理程序 godson2e_pmc_handler 添加一个样本 add_sample。
diff -uNr src/arch/mips/oprofile/op_model_godson2e.c pmc/arch/mips/oprofile/op_model_godson2e.c
--- src/arch/mips/oprofile/op_model_godson2e.c 1970-01-01 08:00:00.000000000 +0800
+++ pmc/arch/mips/oprofile/op_model_godson2e.c 2007-05-25 14:59:54.000000000 +0800
@@ -0,0 +1,171 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 by comcat <jiankemeng@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/oprofile.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include "op_impl.h"
+
+#define GODSON2E_PERFCTL_EXL (1UL << 0)
+#define GODSON2E_PERFCTL_KERNEL (1UL << 1)
+#define GODSON2E_PERFCTL_SUPERVISOR (1UL << 2)
+#define GODSON2E_PERFCTL_USER (1UL << 3)
+#define GODSON2E_PERFCTL_INTERRUPT_ENABLE (1UL << 4)
+#define GODSON2E_PERFCTL_OVERFLOW (1ULL << 31)
+
+#define GODSON2E_COUNTER1_EVENT(event) ((event) << 5)
+#define GODSON2E_COUNTER2_EVENT(event) ((event) << 9)
+
+
+/* IP6 --- performance counter overflow */
+static int godson2e_pmc_irq = 56 + 6;
+
+
+static struct godson2e_register_config {
+ unsigned int control;
+ unsigned int reset_counter1;
+ unsigned int reset_counter2;
+ int c1_enabled;
+ int c2_enabled;
+} reg;
+
+/* Compute all of the registers in preparation for enabling profiling. */
+
+static void godson2e_reg_setup(struct op_counter_config *ctr)
+{
+ unsigned int control = 0;
+
+ /* Compute the performance counter control word. */
+ /* For now count kernel and user mode */
+ if (ctr[0].enabled)
+ control |= GODSON2E_COUNTER1_EVENT(ctr[0].event) |
+ GODSON2E_PERFCTL_INTERRUPT_ENABLE;
+ else
+ control |= GODSON2E_COUNTER1_EVENT(0xe);
+
+ if (ctr[1].enabled)
+ control |= GODSON2E_COUNTER2_EVENT(ctr[1].event) |
+ GODSON2E_PERFCTL_INTERRUPT_ENABLE;
+ else
+ control |= GODSON2E_COUNTER2_EVENT(0x3);
+
+ if (ctr[0].kernel || ctr[1].kernel)
+ control |= GODSON2E_PERFCTL_KERNEL;
+ else
+ control &= ~GODSON2E_PERFCTL_KERNEL;
+
+ if (ctr[0].user || ctr[1].user)
+ control |= GODSON2E_PERFCTL_USER;
+ else
+ control &= ~GODSON2E_PERFCTL_USER;
+
+ if (ctr[0].exl || ctr[1].exl)
+ control |= GODSON2E_PERFCTL_EXL;
+ else
+ control &= ~GODSON2E_PERFCTL_EXL;
+
+ reg.control = control;
+
+ reg.c1_enabled = ctr[0].enabled;
+ reg.c2_enabled = ctr[1].enabled;
+ reg.reset_counter1 = ctr[0].count ? 0x80000000 - ctr[0].count : 0;
+ reg.reset_counter2 = ctr[1].count ? 0x80000000 - ctr[1].count : 0;
+}
+
+static void godson2e_cpu_setup (void *args)
+{
+ uint64_t perfcount;
+
+ perfcount = ((uint64_t) reg.reset_counter2 << 32) | reg.reset_counter1;
+ write_c0_pmc_count(perfcount);
+}
+
+static void godson2e_cpu_start(void *args)
+{
+ /* Start all counters */
+ write_c0_pmc_control(reg.control);
+}
+
+static void godson2e_cpu_stop(void *args)
+{
+ /* Stop all counters */
+ write_c0_pmc_control(0);
+}
+
+
+static irqreturn_t godson2e_pmc_handler(int irq, void * dev_id,
+ struct pt_regs *regs)
+{
+ uint32_t counter1, counter2;
+ uint64_t counters;
+ uint64_t tmp = 0x0;
+
+ /*
+ * Godson2e combines two 32-bit performance counters into a single
+ * 64-bit coprocessor zero register. To avoid a race updating the
+ * registers we need to stop the counters while we're messing with
+ * them ...
+ */
+
+ write_c0_pmc_control(tmp);
+
+ counters = read_c0_pmc_count();
+ counter1 = counters;
+ counter2 = counters >> 32;
+
+ if (reg.c2_enabled && counter2 & GODSON2E_PERFCTL_OVERFLOW) {
+ oprofile_add_sample(regs, 1);
+ counter2 = reg.reset_counter2;
+ }
+
+ if (reg.c1_enabled && counter1 & GODSON2E_PERFCTL_OVERFLOW) {
+ oprofile_add_sample(regs, 0);
+ counter1 = reg.reset_counter1;
+ }
+
+ counters = ((uint64_t)counter2 << 32) | counter1;
+
+ write_c0_pmc_count(counters);
+ write_c0_pmc_control(reg.control);
+
+ return IRQ_HANDLED;
+}
+
+static int __init godson2e_init(void)
+{
+ uint64_t tmp = 0;
+ write_c0_pmc_control(0);
+ write_c0_pmc_count(tmp);
+
+ return request_irq(godson2e_pmc_irq, godson2e_pmc_handler,
+ IRQF_DISABLED, "Perfcounter", NULL);
+}
+
+static void godson2e_exit(void)
+{
+ uint64_t tmp = 0;
+ write_c0_pmc_control(0);
+ write_c0_pmc_count(tmp);
+
+ free_irq(godson2e_pmc_irq, NULL);
+}
+
+struct op_mips_model op_model_godson2e_ops = {
+ .reg_setup = godson2e_reg_setup,
+ .cpu_setup = godson2e_cpu_setup,
+ .init = godson2e_init,
+ .exit = godson2e_exit,
+ .cpu_start = godson2e_cpu_start,
+ .cpu_stop = godson2e_cpu_stop,
+ .cpu_type = "mips/godson2e",
+ .num_counters = 2
+};
+
+
IRQ 56~63 共8个号分配给 MIPS_CPU 内部中断使用,对应于CAUSE寄存器的IP0~IP7。实际用到的只有63号,对应于时钟中断,注意MIPS下时钟中断的产生方式,其是由CPU 内部产生的(参见用户手册对Count与Compare寄存器的描述)。56、57、59、60保留未用,58、61 对应 IP2 与 IP5 分别连接到 I8259A 和北桥内的中断控制器,因而实际并未使用,但是为了防止其他驱动 request_irq 时申请之,故而使用 cascade填充之(可以通过 cat /proc/interrupts 查看)。
因为上面使用了 62 作为计数器溢出的中断请求号,因此当内核检测到 cause 之IP6 为1时,应该 do_IRQ(62, regs):
diff -uNr src/arch/mips/godson/lm2e/irq.c pmc/arch/mips/godson/lm2e/irq.c
--- src/arch/mips/godson/lm2e/irq.c 2006-12-01 18:10:11.000000000 +0800
+++ pmc/arch/mips/godson/lm2e/irq.c 2007-04-29 10:30:24.000000000 +0800
@@ -113,6 +113,8 @@
if (pending & CAUSEF_IP7) {
do_IRQ(63, regs);
+ } else if (pending & CAUSEF_IP6) {
+ do_IRQ(62, regs);
} else if (pending & CAUSEF_IP5) {
i8259_irqdispatch(regs);
} else if (pending & CAUSEF_IP2) {
添加在32位核心模式下,访问64位寄存器的支持。此中缘由参见
《32位模式下使用64位寄存器注意事项》的第3部分。
diff -uNr src/include/asm-mips/mipsregs.h pmc/include/asm-mips/mipsregs.h
--- src/include/asm-mips/mipsregs.h 2006-09-20 11:42:06.000000000 +0800
+++ pmc/include/asm-mips/mipsregs.h 2007-05-22 16:01:48.000000000 +0800
@@ -650,6 +650,57 @@
/*
+ * Macros to access the godson2e system control coprocessor
+ */
+#define __read_64bit_c0_pmc_split(source, sel) /
+({ /
+ unsigned long long val; /
+ unsigned long flags; /
+ /
+ local_irq_save(flags); /
+ if (sel == 0) /
+ __asm__ __volatile__( /
+ ".set/tmips64/n/t" /
+ "dmfc0/t%M0, " #source "/n/t" /
+ "dsll/t%L0, %M0, 32/n/t" /
+ "dsra/t%M0, %M0, 32/n/t" /
+ "dsra/t%L0, %L0, 32/n/t" /
+ ".set/tmips0" /
+ : "=r" (val)); /
+ else /
+ ...
+
+
+/* Godson2e PMC counter register */
+#define read_c0_pmc_count() __read_64bit_c0_pmc($25, 0)
+#define write_c0_pmc_count(val) __write_64bit_c0_register($25, 0, val)
+
这里添加一个判断,用于支持在模块手动加载或者直接编入内核但没有使用 opcontrol --init 初始化oprofile,用户手动设置CP0_24(中断使能位为1)使用性能计数器的情形。由于用户没有使用opcontrol通知内核空间的oprofile完成一些初始化的工作,因此一段时间后计数器溢出,触发处理程序 godson2e_pmc_handler 在其中 oprofile_add_sample 时,由于没有初始化,下面的 entry 会为 NULL,没有判断会出现非法内存访问,表现为内核输出Oop 消息后即死机。
diff -uNr src/drivers/oprofile/cpu_buffer.c pmc/drivers/oprofile/cpu_buffer.c
--- src/drivers/oprofile/cpu_buffer.c 2006-09-20 11:42:06.000000000 +0800
+++ pmc/drivers/oprofile/cpu_buffer.c 2007-04-30 15:39:23.000000000 +0800
@@ -148,6 +148,10 @@
unsigned long pc, unsigned long event)
{
struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos];
+
+ if(!entry)
+ return;
+
entry->eip = pc;
entry->event = event;
increment_head(cpu_buf);
2. 用户空间工具集 patch
用户空间工具与具体的体系结构关系并不密切,因此移植起来要容易很多。
用户空间工具用途主要在于CPU计数器事件的设置、取样数据的保存、数据的分析等。与具体体系结构相关的就是CPU计数事件的设置。因为不同的CPU,其计数时间的定义是不一样的。
Oprofile 用户空间工具安装后,在 /usr/share/oprofile/ 目录下保存有所有他所支持的体系结构CPU的计数事件文件。当其需要设置计数事件时,首先读取 /dev/oprofile/cpu_type 中的字符串,比如pentium IV 为 "i386/p4",则oprofile就以 /usr/share/oprofile/i386/p4/ 下的 events 文件作为当前平台所用之计数事件文件。
因此对龙芯2E 的支持,需要在 /usr/share/oprofile/ 下加入龙芯2E 计数事件文件,具体的位于哪个目录,则要与上面内核空间导出的 /dev/oprofile/cpu_type值相一致。
因为上面在 op_model_godson2e.c 的结构体op_model_godson2e_ops 里已经添加龙芯2E 的cpu_type 字符串为: mips/godson2e,则须在源码目录的 events/mips/ 下添加 godson2e/events 和 godson2e/unit_masks。
下面是最主要的 events 文件,具体事件描述参见2E用户手册:
diff -Nru oprofile-0.9.2/events/mips/godson2e/events oprofile-0.9.2-godson/events/mips/godson2e/events
--- oprofile-0.9.2/events/mips/godson2e/events 1970-01-01 08:00:00.000000000 +0800
+++ oprofile-0.9.2-godson/events/mips/godson2e/events 2007-04-23 16:17:10.000000000 +0800
@@ -0,0 +1,38 @@
+#
+# GODSON2E events
+#
+# The same event numbers mean different things on the two counters
+#
+event:0x00 counters:0 um:zero minimum:500 name:CYCLES : Cycles
+event:0x01 counters:0 um:zero minimum:500 name:BRANCH_NUMBER : Branch instruction number
+event:0x02 counters:0 um:zero minimum:500 name:JR_NUMBER : JR instruction number
+event:0x03 counters:0 um:zero minimum:500 name:JR31_NUMBER : JR 31 number
+event:0x04 counters:0 um:zero minimum:500 name:ICACHE_MISSES : Instruction Cache misses number
+event:0x05 counters:0 um:zero minimum:500 name:ALU1_ISSUED : ALU1 unit instruction count
+event:0x06 counters:0 um:zero minimum:500 name:MEM_ISSUED : MEM unit instruction count
+event:0x07 counters:0 um:zero minimum:500 name:FALU1_ISSUED : FALU1 unit intruction count
+event:0x08 counters:0 um:zero minimum:500 name:BHT : BHT intruction count
+event:0x09 counters:0 um:zero minimum:500 name:MEM_READ : memory read count
+event:0x0a counters:0 um:zero minimum:500 name:FIX_QUEUE_FULL : Fix queue full count
+event:0x0b counters:0 um:zero minimum:500 name:REORDER_QUEUE_FULL : Reorder queue full count
+event:0x0c counters:0 um:zero minimum:500 name:CP0_QUEUE_FULL : CP0 queue full count
+event:0x0d counters:0 um:zero minimum:500 name:TLB_REFILL : TLB refill count
+event:0x0e counters:0 um:zero minimum:500 name:INTERRUPT : Interrupt count
+event:0x0f counters:0 um:zero minimum:500 name:INTERNAL_EXCEPTION : Internal exception count
+
+event:0x00 counters:1 um:zero minimum:500 name:COMMITTED_ISSUED : Committed instruction count
+event:0x01 counters:1 um:zero minimum:500 name:BRANCHES_MISPREDICTED : Branches mispredicted count
+event:0x02 counters:1 um:zero minimum:500 name:JR_MISPREDICTED : JR mispredicted count
+event:0x03 counters:1 um:zero minimum:500 name:JR31_MISPREDICTED : JR mispredicted count
+event:0x04 counters:1 um:zero minimum:500 name:DCACHE_MISSES : Data cache misses count
+event:0x05 counters:1 um:zero minimum:500 name:ALU2_ISSUED : ALU2 unit instructions count
+event:0x06 counters:1 um:zero minimum:500 name:FALU2_ISSUED : FALU2 unit instruction count
+event:0x07 counters:1 um:zero minimum:500 name:UNCACHE : Uncache operation count
+event:0x08 counters:1 um:zero minimum:500 name:BHT_MISSES : BHT misses
+event:0x09 counters:1 um:zero minimum:500 name:STORE_ISSUED : Store operation count
+event:0x0a counters:1 um:zero minimum:500 name:FLOAT_POINTING_QUEUE_FULL : Floatpointing queue full count
+event:0x0b counters:1 um:zero minimum:500 name:BRANCH_QUEUE_FULL : Branch queue full count
+event:0x0c counters:1 um:zero minimum:500 name:ITLB_MISSES : Instruction tlb misses count
+event:0x0d counters:1 um:zero minimum:500 name:EXCEPTION : Total exception count
+event:0x0e counters:1 um:zero minimum:500 name:LOAD_SPECULATION : Load speculation count
+event:0x0f counters:1 um:zero minimum:500 name:CP0_FORWARD : CP0 forward count
其他细微处的修改,参见具体的patch 文件: http://people.openrays.org/~comcat/patch/oprofile-0.9.2-godson.patch