oprofile for godson2e 移植手记


当前版本: 0.1
完成日期: 2007-6-7

作者: Dajie Tan <jiankemeng@gmail.com>



0. 概述

oprofile 在Linux 上分两部分,一个是内核模块(oprofile.ko),一个为用户空间的守护进程(oprofiled)。前者负责访问性能计数器或者注册基于时间采样的函数(使用register_timer_hook注册之,使时钟中断处理程序最后执行profile_tick 时可以访问之),并采样置于内核的缓冲区内。后者在后台运行,负责从内核空间收集数据,写入文件。二者都需要添加对龙芯2E 的支持。


1. kernel patch

完整之 patch 位于: http://people.openrays.org/~comcat/patch/patch-2.6.18-oprofile-comcat

主要是当计数器溢出(31位为1)时,触发IP6 中断,其处理程序的 godson2e_pmc_handler 的编写。

注意一下在用户空间设置的样本计数,在内核空间是怎么处理的。例如使用

opcontrol --setup --event=CYCLES:1000 设置样本计数为1000,

则计数器被初始化的值为0x8000 0000 - 1000
即: CYCLES 事件发生 1000 次后,计数溢出,触发处理程序 godson2e_pmc_handler 添加一个样本 add_sample。

diff -uNr src/arch/mips/oprofile/op_model_godson2e.c pmc/arch/mips/oprofile/op_model_godson2e.c
--- src/arch/mips/oprofile/op_model_godson2e.c     1970-01-01 08:00:00.000000000 +0800
+++ pmc/arch/mips/oprofile/op_model_godson2e.c     2007-05-25 14:59:54.000000000 +0800
@@ -0,0 +1,171 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 by comcat <jiankemeng@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/oprofile.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include "op_impl.h"
+
+#define GODSON2E_PERFCTL_EXL               (1UL   << 0)
+#define GODSON2E_PERFCTL_KERNEL               (1UL   << 1)
+#define GODSON2E_PERFCTL_SUPERVISOR         (1UL   << 2)
+#define GODSON2E_PERFCTL_USER               (1UL   << 3)
+#define GODSON2E_PERFCTL_INTERRUPT_ENABLE     (1UL   << 4)
+#define GODSON2E_PERFCTL_OVERFLOW         (1ULL     << 31)
+
+#define GODSON2E_COUNTER1_EVENT(event)     ((event) << 5)
+#define GODSON2E_COUNTER2_EVENT(event)     ((event) << 9)
+
+
+/* IP6 --- performance counter overflow */
+static int godson2e_pmc_irq = 56 + 6;
+
+
+static struct godson2e_register_config {
+     unsigned int control;
+     unsigned int reset_counter1;
+     unsigned int reset_counter2;
+     int c1_enabled;
+     int c2_enabled;
+} reg;
+
+/* Compute all of the registers in preparation for enabling profiling. */
+
+static void godson2e_reg_setup(struct op_counter_config *ctr)
+{
+     unsigned int control = 0;
+
+     /* Compute the performance counter control word. */
+     /* For now count kernel and user mode */
+     if (ctr[0].enabled)
+         control |= GODSON2E_COUNTER1_EVENT(ctr[0].event) |
+                 GODSON2E_PERFCTL_INTERRUPT_ENABLE;
+     else
+         control |= GODSON2E_COUNTER1_EVENT(0xe);
+
+     if (ctr[1].enabled)
+         control |= GODSON2E_COUNTER2_EVENT(ctr[1].event) |
+                 GODSON2E_PERFCTL_INTERRUPT_ENABLE;
+     else
+         control |= GODSON2E_COUNTER2_EVENT(0x3);
+
+     if (ctr[0].kernel || ctr[1].kernel)
+         control |= GODSON2E_PERFCTL_KERNEL;
+     else
+         control &= ~GODSON2E_PERFCTL_KERNEL;
+
+     if (ctr[0].user || ctr[1].user)
+         control |= GODSON2E_PERFCTL_USER;
+     else
+         control &= ~GODSON2E_PERFCTL_USER;
+
+     if (ctr[0].exl || ctr[1].exl)
+         control |= GODSON2E_PERFCTL_EXL;
+     else
+         control &= ~GODSON2E_PERFCTL_EXL;
+
+     reg.control = control;
+
+     reg.c1_enabled = ctr[0].enabled;
+     reg.c2_enabled = ctr[1].enabled;
+     reg.reset_counter1 = ctr[0].count ? 0x80000000 - ctr[0].count : 0;
+     reg.reset_counter2 = ctr[1].count ? 0x80000000 - ctr[1].count : 0;
+}
+
+static void godson2e_cpu_setup (void *args)
+{
+     uint64_t perfcount;
+
+     perfcount = ((uint64_t) reg.reset_counter2 << 32) | reg.reset_counter1;
+     write_c0_pmc_count(perfcount);
+}
+
+static void godson2e_cpu_start(void *args)
+{
+     /* Start all counters */
+     write_c0_pmc_control(reg.control);
+}
+
+static void godson2e_cpu_stop(void *args)
+{
+     /* Stop all counters */
+     write_c0_pmc_control(0);
+}
+
+
+static irqreturn_t godson2e_pmc_handler(int irq, void * dev_id,
+     struct pt_regs *regs)
+{
+     uint32_t counter1, counter2;
+     uint64_t counters;
+     uint64_t tmp = 0x0;
+
+     /*
+     * Godson2e combines two 32-bit performance counters into a single
+     * 64-bit coprocessor zero register. To avoid a race updating the
+     * registers we need to stop the counters while we're messing with
+     * them ...
+     */
+
+     write_c0_pmc_control(tmp);
+
+     counters = read_c0_pmc_count();
+     counter1 = counters;
+     counter2 = counters >> 32;
+
+     if (reg.c2_enabled && counter2 & GODSON2E_PERFCTL_OVERFLOW) {
+         oprofile_add_sample(regs, 1);
+         counter2 = reg.reset_counter2;
+     }
+
+     if (reg.c1_enabled && counter1 & GODSON2E_PERFCTL_OVERFLOW) {
+         oprofile_add_sample(regs, 0);
+         counter1 = reg.reset_counter1;
+     }
+
+     counters = ((uint64_t)counter2 << 32) | counter1;
+
+     write_c0_pmc_count(counters);
+     write_c0_pmc_control(reg.control);
+
+     return IRQ_HANDLED;
+}
+
+static int __init godson2e_init(void)
+{
+     uint64_t tmp = 0;
+     write_c0_pmc_control(0);
+     write_c0_pmc_count(tmp);
+
+     return request_irq(godson2e_pmc_irq, godson2e_pmc_handler,
+                 IRQF_DISABLED, "Perfcounter", NULL);
+}
+
+static void godson2e_exit(void)
+{
+     uint64_t tmp = 0;
+     write_c0_pmc_control(0);
+     write_c0_pmc_count(tmp);
+
+     free_irq(godson2e_pmc_irq, NULL);
+}
+
+struct op_mips_model op_model_godson2e_ops = {
+     .reg_setup     = godson2e_reg_setup,
+     .cpu_setup     = godson2e_cpu_setup,
+     .init         = godson2e_init,
+     .exit         = godson2e_exit,
+     .cpu_start     = godson2e_cpu_start,
+     .cpu_stop     = godson2e_cpu_stop,
+     .cpu_type     = "mips/godson2e",
+     .num_counters     = 2
+};
+
+

IRQ 56~63 共8个号分配给 MIPS_CPU 内部中断使用,对应于CAUSE寄存器的IP0~IP7。实际用到的只有63号,对应于时钟中断,注意MIPS下时钟中断的产生方式,其是由CPU 内部产生的(参见用户手册对Count与Compare寄存器的描述)。56、57、59、60保留未用,58、61 对应 IP2 与 IP5 分别连接到 I8259A 和北桥内的中断控制器,因而实际并未使用,但是为了防止其他驱动 request_irq 时申请之,故而使用 cascade填充之(可以通过 cat /proc/interrupts 查看)。

因为上面使用了 62 作为计数器溢出的中断请求号,因此当内核检测到 cause 之IP6 为1时,应该 do_IRQ(62, regs):

diff -uNr src/arch/mips/godson/lm2e/irq.c pmc/arch/mips/godson/lm2e/irq.c
--- src/arch/mips/godson/lm2e/irq.c     2006-12-01 18:10:11.000000000 +0800
+++ pmc/arch/mips/godson/lm2e/irq.c     2007-04-29 10:30:24.000000000 +0800
@@ -113,6 +113,8 @@

    if (pending & CAUSEF_IP7) {
          do_IRQ(63, regs);
+     } else if (pending & CAUSEF_IP6) {
+         do_IRQ(62, regs);
    } else if (pending & CAUSEF_IP5) {
          i8259_irqdispatch(regs);
    } else if (pending & CAUSEF_IP2) {


添加在32位核心模式下,访问64位寄存器的支持。此中缘由参见
《32位模式下使用64位寄存器注意事项》的第3部分。

diff -uNr src/include/asm-mips/mipsregs.h pmc/include/asm-mips/mipsregs.h
--- src/include/asm-mips/mipsregs.h     2006-09-20 11:42:06.000000000 +0800
+++ pmc/include/asm-mips/mipsregs.h     2007-05-22 16:01:48.000000000 +0800
@@ -650,6 +650,57 @@

/*
+ * Macros to access the godson2e system control coprocessor
+ */
+#define __read_64bit_c0_pmc_split(source, sel)                   /
+({                                           /
+     unsigned long long val;                             /
+     unsigned long flags;                             /
+                                           /
+     local_irq_save(flags);                             /
+     if (sel == 0)                                 /
+         __asm__ __volatile__(                       /
+               ".set/tmips64/n/t"                   /
+               "dmfc0/t%M0, " #source "/n/t"               /
+               "dsll/t%L0, %M0, 32/n/t"               /
+               "dsra/t%M0, %M0, 32/n/t"               /
+               "dsra/t%L0, %L0, 32/n/t"               /
+               ".set/tmips0"                       /
+               : "=r" (val));                       /
+     else                                     /
+     ...
+

+
+/* Godson2e PMC counter register */
+#define read_c0_pmc_count()     __read_64bit_c0_pmc($25, 0)
+#define write_c0_pmc_count(val)     __write_64bit_c0_register($25, 0, val)
+


这里添加一个判断,用于支持在模块手动加载或者直接编入内核但没有使用 opcontrol --init 初始化oprofile,用户手动设置CP0_24(中断使能位为1)使用性能计数器的情形。由于用户没有使用opcontrol通知内核空间的oprofile完成一些初始化的工作,因此一段时间后计数器溢出,触发处理程序 godson2e_pmc_handler 在其中 oprofile_add_sample 时,由于没有初始化,下面的 entry 会为 NULL,没有判断会出现非法内存访问,表现为内核输出Oop 消息后即死机。

diff -uNr src/drivers/oprofile/cpu_buffer.c pmc/drivers/oprofile/cpu_buffer.c
--- src/drivers/oprofile/cpu_buffer.c     2006-09-20 11:42:06.000000000 +0800
+++ pmc/drivers/oprofile/cpu_buffer.c     2007-04-30 15:39:23.000000000 +0800
@@ -148,6 +148,10 @@
        unsigned long pc, unsigned long event)
{
    struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos];
+
+     if(!entry)
+         return;
+
    entry->eip = pc;
    entry->event = event;
    increment_head(cpu_buf);



2. 用户空间工具集 patch


用户空间工具与具体的体系结构关系并不密切,因此移植起来要容易很多。

用户空间工具用途主要在于CPU计数器事件的设置、取样数据的保存、数据的分析等。与具体体系结构相关的就是CPU计数事件的设置。因为不同的CPU,其计数时间的定义是不一样的。

Oprofile 用户空间工具安装后,在 /usr/share/oprofile/ 目录下保存有所有他所支持的体系结构CPU的计数事件文件。当其需要设置计数事件时,首先读取 /dev/oprofile/cpu_type 中的字符串,比如pentium IV 为 "i386/p4",则oprofile就以 /usr/share/oprofile/i386/p4/ 下的 events 文件作为当前平台所用之计数事件文件。

因此对龙芯2E 的支持,需要在 /usr/share/oprofile/ 下加入龙芯2E 计数事件文件,具体的位于哪个目录,则要与上面内核空间导出的 /dev/oprofile/cpu_type值相一致。

因为上面在 op_model_godson2e.c 的结构体op_model_godson2e_ops 里已经添加龙芯2E 的cpu_type 字符串为: mips/godson2e,则须在源码目录的 events/mips/ 下添加 godson2e/events 和 godson2e/unit_masks。

下面是最主要的 events 文件,具体事件描述参见2E用户手册:

diff -Nru oprofile-0.9.2/events/mips/godson2e/events oprofile-0.9.2-godson/events/mips/godson2e/events
--- oprofile-0.9.2/events/mips/godson2e/events     1970-01-01 08:00:00.000000000 +0800
+++ oprofile-0.9.2-godson/events/mips/godson2e/events     2007-04-23 16:17:10.000000000 +0800
@@ -0,0 +1,38 @@
+#
+# GODSON2E events
+#
+# The same event numbers mean different things on the two counters
+#
+event:0x00 counters:0 um:zero minimum:500 name:CYCLES : Cycles
+event:0x01 counters:0 um:zero minimum:500 name:BRANCH_NUMBER : Branch instruction number
+event:0x02 counters:0 um:zero minimum:500 name:JR_NUMBER : JR instruction number
+event:0x03 counters:0 um:zero minimum:500 name:JR31_NUMBER : JR 31 number
+event:0x04 counters:0 um:zero minimum:500 name:ICACHE_MISSES : Instruction Cache misses number
+event:0x05 counters:0 um:zero minimum:500 name:ALU1_ISSUED : ALU1 unit instruction count
+event:0x06 counters:0 um:zero minimum:500 name:MEM_ISSUED : MEM unit instruction count
+event:0x07 counters:0 um:zero minimum:500 name:FALU1_ISSUED : FALU1 unit intruction count
+event:0x08 counters:0 um:zero minimum:500 name:BHT : BHT intruction count
+event:0x09 counters:0 um:zero minimum:500 name:MEM_READ : memory read count
+event:0x0a counters:0 um:zero minimum:500 name:FIX_QUEUE_FULL : Fix queue full count
+event:0x0b counters:0 um:zero minimum:500 name:REORDER_QUEUE_FULL : Reorder queue full count
+event:0x0c counters:0 um:zero minimum:500 name:CP0_QUEUE_FULL : CP0 queue full count
+event:0x0d counters:0 um:zero minimum:500 name:TLB_REFILL : TLB refill count
+event:0x0e counters:0 um:zero minimum:500 name:INTERRUPT : Interrupt count
+event:0x0f counters:0 um:zero minimum:500 name:INTERNAL_EXCEPTION : Internal exception count
+
+event:0x00 counters:1 um:zero minimum:500 name:COMMITTED_ISSUED : Committed instruction count
+event:0x01 counters:1 um:zero minimum:500 name:BRANCHES_MISPREDICTED : Branches mispredicted count
+event:0x02 counters:1 um:zero minimum:500 name:JR_MISPREDICTED : JR mispredicted count
+event:0x03 counters:1 um:zero minimum:500 name:JR31_MISPREDICTED : JR mispredicted count
+event:0x04 counters:1 um:zero minimum:500 name:DCACHE_MISSES : Data cache misses count
+event:0x05 counters:1 um:zero minimum:500 name:ALU2_ISSUED : ALU2 unit instructions count
+event:0x06 counters:1 um:zero minimum:500 name:FALU2_ISSUED : FALU2 unit instruction count
+event:0x07 counters:1 um:zero minimum:500 name:UNCACHE : Uncache operation count
+event:0x08 counters:1 um:zero minimum:500 name:BHT_MISSES : BHT misses
+event:0x09 counters:1 um:zero minimum:500 name:STORE_ISSUED : Store operation count
+event:0x0a counters:1 um:zero minimum:500 name:FLOAT_POINTING_QUEUE_FULL : Floatpointing queue full count
+event:0x0b counters:1 um:zero minimum:500 name:BRANCH_QUEUE_FULL : Branch queue full count
+event:0x0c counters:1 um:zero minimum:500 name:ITLB_MISSES : Instruction tlb misses count
+event:0x0d counters:1 um:zero minimum:500 name:EXCEPTION : Total exception count
+event:0x0e counters:1 um:zero minimum:500 name:LOAD_SPECULATION : Load speculation count
+event:0x0f counters:1 um:zero minimum:500 name:CP0_FORWARD : CP0 forward count


其他细微处的修改,参见具体的patch 文件: http://people.openrays.org/~comcat/patch/oprofile-0.9.2-godson.patch



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值