xx项目fulldump无法生成分析

一 背景

MTK平台的项目非user版本触发panic都会生成fulldump,xx项目遇到无法生成fulldump问题,特以此文章来阐述相关分析流程。

二 原理

fulldump的生成大致流程如下

boot_mode_select函数
void boot_mode_select(void)
{
        int factory_forbidden = 0;
        //  int forbid_mode;
        /*We put conditions here to filer some cases that can not do key detection*/
        extern int kedump_mini(void) __attribute__((weak));
        if (kedump_mini) {
                if (kedump_mini()) {
                        mrdump_check(); =》here,这里面如果能执行mrdump的操作就直接操作了,如果不支持就直接往后继续执行,所以不管支不支持都往下走
                        if (g_boot_mode == FASTBOOT)
                                return;
#ifdef MTK_PMIC_FULL_RESET
                        dprintf(CRITICAL, "kedump:full pmic reset!\n"); =》这个打印很常见,就是dump执行完成后打印的,准备做一个pmic的full reset,就是全部的重启
                        mtk_arch_full_reset(); =》这里调用重启,全部下电,冷重启
#else
                        dprintf(CRITICAL, "kedump:sw reset!\n");
                        mtk_arch_reset(1);
#endif
                        return;
                }
        }
【扩展下】
################
mtk_arch_full_reset对应的log信息:做的事就是保存pl_lk log,然后真正的做一些cord reset设置寄存器操作
[1542] LK_LOG_STORE: start save pllk log.
[1543] LK_LOG_STORE: part_size 41943040.
[1543] LK_LOG_STORE: size <= emmc_remain_buf_size write emmc.
[1545] LK_LOG_STORE: expdb_write offset 40988672 size 75652 ret value = 75652
[1546] LK_LOG_STORE: config write emmc.
[1546] LK_LOG_STORE: expdb_write offset 41939056 size 12 ret value = 12
[1547] LK_LOG_STORE: config re-write emmc.
[1548] LK_LOG_STORE: expdb_write offset 41938944 size 52 ret value = 52
[1549] LK_LOG_STORE: save pllk log size 0x12784, offset 0x117000.
[1550] [WDT] mtk_arch_full_reset
[1550] mt6360_pmic_enable_poweroff_sequence: en = 1
[1551] mt6360_pmic_check_hw_id: ID:48 MT6877 LP5
[1552] mt6360_pmic_write_byte: I2CW[0x07] = 0x06
[1553] mt6360_pmic_check_hw_id: ID:48 MT6877 LP5
[1553] mt6360_pmic_write_byte: I2CW[0x08] = 0x04
[1554] mt6360_pmic_check_hw_id: ID:48 MT6877 LP5
[1555] mt6360_pmic_write_byte: I2CW[0x09] = 0x00
[1556] mt6360_pmic_check_hw_id: ID:48 MT6877 LP5
[1557] mt6360_pmic_write_byte: I2CW[0x0A] = 0x02
对应的code信息:
void mtk_arch_full_reset(void)
{
#ifdef MTK_PMIC_FULL_RESET
        /* save pl lk log to expdb before pmic full reset*/
        save_pllk_log();
        WDTCRI("mtk_arch_full_reset\n");
        /* PMIC full reset will happen (reset immediately) inside. */
        pmic_cold_reset();
        while (1);
#else
        WDTCRI("mtk_arch_full_reset: PMIC full reset is not supported.\n");
#endif
}
void pmic_cold_reset(void)
{
#if EXT_BUCK_MT6315
        mt6315_all_seq_off(1);
#endif
#ifdef MTK_5G_B_MT6360_MT6315
        mt6360_pmic_enable_poweroff_sequence(true);
#else
        mtk_subpmic_enable_poweroff_sequence(true);
#endif
        pmic_set_register_value(PMIC_RG_CRST, 1);
}
################
#ifdef MTK_PMIC_FULL_RESET
        if (current_is_abnormal_boot()) {
                dprintf(CRITICAL, "abnormal boot but already dumped, full pmic reset\n");
                mtk_arch_full_reset();
        }
#endif
        if (meta_detection()) {
                return;
        }
kedump_mini函数:在进入boot mode后,都会执行kedump_mini操作,进入里面
正常log:
kedump mini start
kedump: current time: [2010/1/1 0:0:7]
kedump: ddr reserve mode disabled
[771] atf ram dump address hi:0x0, adress lo:0x0, size: 0
[772] atf ram dump address:0x0, size: 0
[773] ATF: LAST BUFF
[773] atf_log_buf_addr:0xbfe14000, atf_log_buf_size:131072, atf_crash_flag addr:0xbfe0002c, atf_log_type:0x41544641
[774] plat_atf_log_get:0x4824f71d, plat_atf_crash_get:0x0
kedump: lkdump debug init ok
kedump: boot_reason(4)
RAM_CONSOLE. boot_arg(PL2LK): sram_addr:0x11d000, sram_size:0x800, def_type:0x1, memory_info_offset:0xec0
RAM_CONSOLE. start: 0x11d000, size: 0x800
RAM_CONSOLE. lk last status: 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
RAM_CONSOLE. wdt_status 0x0, fiq_step 0x0, exp_type 0x0
RAM_CONSOLE. set reboot reason info done
kedump: last is not full pmic reset!
kedump: partiton 8[1e08000 - 2500000]
/* Dump KE infomation to expdb */
/*  1: has expception, 0: has no exception */
int kedump_mini(void)
{
        const char *status;
        struct rtc_time tm;
        if (!aee_check_enable()) {
                LOG("aee: disable\n");
                return 0;
        }
        if (!read_kedump_config()) {
                LOG("kedump: disable\n");
                return 0;
        }
        status = smart_reset_check();
        if (status != NULL)
                LOG("%s\n", status);
        status = mtk_wdt_get_last_stage();
        if (status != NULL)
                LOG("%s\n", status);
        LOG("kedump mini start\n");
        rtc_get_time(&tm);
        LOG("kedump: current time: [%d/%d/%d %d:%d:%d]\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
        if (g_boot_arg) {
                if (!g_boot_arg->ddr_reserve_enable)
                        LOG("kedump: ddr reserve mode disabled\n"); =》一般开机都会有这样的打印,即正常开或者mrdump没有开启都会是ddr reserve mode disable
                else
                        LOG("kedump: ddr reserve mode enabled\n");
                if (!g_boot_arg->ddr_reserve_success)
                        LOG("kedump: ddr reserve mode failed\n");
        } else {
                LOG("kedump: null boot arg pointer error\n");
        }
        if (lkdump_debug_init())
                LOG("kedump: lkdump debug init ok\n");
        else
                LOG("kedump: lkdump debug not ready\n");
        if (kedump_skip()) =》正常情况下可能这边就直接skip了,看下它的实现
                return 0;
        if (kedump_avail())
=>这里主要是对应dump的地址、size描述
kedump: address:0x49100000, page size:0x1000
vmalloc_start:0xffffff8008000000, vmalloc_end:0xffffffbebfff0000,master_page_table:0x425de000, high_memory:0xffffffd8ffffe000
kedump: dram range 0x40000000 - 0x240000000
kedump: partiton 8[1e08000 - 2500000]
kedump: offset:0x0, data:0x4847c0d8, size:0x1030
kedump: block size:0x1000
kedump: misc data 800@11d000+0
kedump: offset:0x2000, data:0x11d000, size:0x800
kedump: crc = 0x5fa2e57e
kedump add: SYS_RAMCONSOLE_RAW[0] 800/800@2000
kedump: offset:0x30, data:0x4847c108, size:0x40
kedump: misc data e0000@48090000+0
kedump: offset:0x2800, data:0x48090000, size:0xe0000
kedump: crc = 0x7a98aed6
kedump add: SYS_PSTORE_RAW[1] e0000/e0000@2800
kedump: offset:0x70, data:0x4847c148, size:0x40
kedump: offset:0xe2800, data:0x48358030, size:0x8
kedump add: KEDUMP_CRC[2] 8/8@e2800
kedump: offset:0xb0, data:0x4847c188, size:0x40
kedump: KEHeader 0x48170000
kedump: read header 0x0x48170000[0x7f454c46]
                return 0;
        g_mcb = aee_mrdump_get_info(); =》这里主要是判断mrdump相关信息,是否匹配等
#define MRDUMP_GO_DUMP "MRDUMP11" =》有一个sig的判断
        show_info(g_mcb); =》打印一些相关信息
vmalloc_start:0xffffff8008000000, vmalloc_end:0xffffffbebfff0000,master_page_table:0x425de000, high_memory:0xffffffd8ffffe000 这个就是这里打印的
        kedump_get_dram_range();
kedump: dram range 0x40000000 - 0x240000000 =》打印dram的range 范围1g ~9g
        kedump_to_expdb(); =》这边就是将minidump数据写到expdb分区的操作,具体的有时间再看
        kedump_done(); =》最后的一个收尾动作,对cache里面的数据做一个clean的操作,结束
        LOG("kedump mini done\n");
        return 1;
}
kedump_skip函数:
static int kedump_skip(void)
{
        unsigned int boot_reason = g_boot_arg->boot_reason;
        static int kedump_dumped = 0;
        LOG("kedump: boot_reason(%d)\n", boot_reason);
        ram_console_init();
        /* this flow should be executed once only */
        if (kedump_dumped == 0) {
                kedump_dumped = 1;
                if (ram_console_is_abnormal_boot()) =》这里会判断启动是否是一次abnormal boot
                        return 0;
        }
##############################
异常log打印:
RAM_CONSOLE. boot_arg(PL2LK): sram_addr:0x11d000, sram_size:0x800, def_type:0x1, memory_info_offset:0xec0
RAM_CONSOLE. start: 0x11d000, size: 0x800
RAM_CONSOLE. lk last status: 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
RAM_CONSOLE. wdt_status 0x2, fiq_step 0x47, exp_type 0x2 =>主要是读取wdt的状态来判断是否是abnormal reboot,主要是读取reboot_reason_pl结构体数据
RAM_CONSOLE. set reboot reason info done
RAM_CONSOLE detect abnormal boot exp_type:0x2 =》可以看到检测到了abnormal
正常log打印:
RAM_CONSOLE. boot_arg(PL2LK): sram_addr:0x11d000, sram_size:0x800, def_type:0x1, memory_info_offset:0xec0
RAM_CONSOLE. start: 0x11d000, size: 0x800
RAM_CONSOLE. lk last status: 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
RAM_CONSOLE. wdt_status 0x0, fiq_step 0x0, exp_type 0x0
RAM_CONSOLE. set reboot reason info done
############################
#ifdef MTK_PMIC_FULL_RESET
        //if this reboot is full pmic reset, then restore the memory of ram_console and pstore
        if (ram_console_reboot_by_cold_reset()) { =》这里会判断上一次重启是否是cold reset,如果是cold reset就直接进入,返回,也就是说出现异常需要dump的时候都是warm reset
                LOG("kedump: last is full pmic reset!\n");
                kedump_restore_mem();
                return 1;
        } else {
                LOG("kedump: last is not full pmic reset!\n");
        }
#endif
        // for power lost or reboot before KE DB collected scenario
        kedump_restore_mem();
        return 1;
}
mrdump_check函数
static int mrdump_check(void)
{
        if (mrdump_detection()) { =》因为这个函数check返回的是0,所以走的else
                mt65xx_backlight_on();
                if (g_boot_mode == FASTBOOT)
                        return 1;
                mrdump_run2();
                return 1;
        } else {
                return 0; =》这里return 0了
        }
}
mrdump_detection函数
int mrdump_detection(void)
{
        if (aee_check_enable() != AEE_ENABLE_FULL)
                return 0;
        mrdump_key_secure_enable();
        output_device = mrdump_get_default_output_device();
        if (output_device == MRDUMP_DEV_UNKNOWN) {
                return 0;
        }
        if (!ram_console_is_abnormal_boot()) {
                dprintf(CRITICAL, "MT-RAMDUMP: No exception detected, skipped\n");
                return 0;
        }
        mrdump_cb = aee_mrdump_get_params();
        if (mrdump_cb == NULL) {
                dprintf(CRITICAL, "MT-RAMDUMP control block not found\n");
                return 0;
        }
        memset(&cblock_result, 0, sizeof(struct mrdump_cblock_result));
        log_size = 0;
        strlcpy(cblock_result.sig, MRDUMP_GO_DUMP, sizeof(cblock_result.sig));
        uint8_t reboot_mode = mrdump_cb->crash_record.reboot_mode;
        if (!g_boot_arg->ddr_reserve_enable) {
                voprintf_debug("DDR reserve mode disabled\n");
                mrdump_status_none("DDR reserve mode disabled\n");
                goto error;
        }
        if (!g_boot_arg->ddr_reserve_success) {
                voprintf_debug("DDR reserve mode failed\n");
                mrdump_status_none("DDR reserve mode failed\n"); =》问题log在这里直接跳到error
                goto error;
        }
..........
error:
        mrdump_write_result();
        return 0; =》这里直接返回了
所有上面的流程结束了:查找一下问题点ddr reserve mode disable是为啥?看g_boot_arg->ddr_reserve_enable的传递实现

lk的arg一般都是从preloader里面传过来的,搜索下关键词,看看什么时候是1使能状态

junfang1@buildsrv-n43:~/work/0_code/s0-trank-user/vendor/mediatek/proprietary/bootable/bootloader/preloader/platform/mt6877$ grep -rn "ddr_reserve_enable"
src/core/main.c:419:extern u32 g_ddr_reserve_enable;
src/core/main.c:582:         (g_ddr_reserve_enable==1 && g_ddr_reserve_success==1) ? "with" : "without",
src/drivers/dramc_top.c:333:extern u32 g_ddr_reserve_enable;
src/drivers/dramc_top.c:1027:           if(g_ddr_reserve_enable==1 && g_ddr_reserve_success==1) {
src/drivers/dramc_top.c:1081:           g_ddr_reserve_enable = 1;
src/drivers/dramc_top.c:1126:           g_ddr_reserve_enable = 0;
void check_ddr_reserve_status(void)
{
        /* get status of DCS and DVFSRC */
        int dvfsrc_success = drm_is_dvfsrc_success();
        int dvfsrc_en = drm_is_dvfsrc_enable();
#ifdef DDR_RESERVE_MODE
        int counter = TIMEOUT;
        if(drm_is_reserve_ddr_enabled()) {
                g_ddr_reserve_enable = 1;
#ifdef LAST_DRAMC
                dram_fatal_set_ddr_rsv_mode_flow();
#endif
                if(drm_is_reserve_ddr_mode_success()) { =》这里最后是出问题的
                        while(counter) {
                                if(drm_is_dram_slf()) {
                                        g_ddr_reserve_success = 1;
                                        break;
                                }
                                counter--;
                        }
                        if(counter == 0) {
                                dramc_crit("[DDR Reserve] ddr reserve mode success but DRAM not in self-refresh!\n");
                                g_ddr_reserve_success = 0;
#ifdef LAST_DRAMC
                                dram_fatal_set_ddr_rsv_mode_err();
#endif
                        }
                } else {
                        dramc_crit("[DDR Reserve] ddr reserve mode FAIL!\n"); =》这里出来了
                        drm_dram_reserved(0);
                        g_ddr_reserve_success = 0;
#ifdef LAST_DRAMC
                        dram_fatal_set_ddr_rsv_mode_err();
#endif
                }
                /* overwrite g_ddr_reserve_success if dvfsrc failed */
                if (dvfsrc_en == 1 && dvfsrc_success == 0) {
                        dramc_crit("[DDR Reserve] DVFSRC fail!\n");
                        drm_dram_reserved(0);
                        g_ddr_reserve_success = 0;
#if 0//def LAST_DRAMC
                        dram_fatal_set_dvfsrc_err();
#endif
                } else {
                        dramc_crit("[DDR Reserve] DVFSRC success! (dvfsrc_en=%d)\n", dvfsrc_en);
                }
                /* release dram, no matter success or failed */
                release_dram();
        } else {
                dramc_crit("[DDR Reserve] ddr reserve mode not be enabled yet\n");
                g_ddr_reserve_enable = 0;
                #ifdef LAST_DRAMC
                enable_dramc_clk();
                #endif
        }
#else
#ifdef LAST_DRAMC
        enable_dramc_clk();
#endif
#endif

SMART RESET: FALSE
rst from: pl =》 pl的时候异常了导致,ddr reserved mode disable了,所以应该是pl异常重启了
kedump mini start
kedump: current time: [2022/9/8 2:41:12]
kedump: ddr reserve mode disabled
再往前看可疑log:
dvfsrc_opp_level_mapping: OP_TYPE=1, VMODE=0, RSV4=100000
dvfsrc_opp_level_mapping: FINAL vcore_opp_uv: 750000, 725000, 650000, 600000, 550000
#T#dvfsrc_opp_level_mapping=3
[DDR Reserve] ddr reserve mode FAIL! =》这里failed了
[DDR Reserve] DVFSRC success! (dvfsrc_en=1)
[MT6359] 1 6,56
[MT6315]1 16,80
[MT6315]1 16,80
[MT6359] cnvdata 1800000, n_size 3
[MT6359] 1 9,12
@ DDR reserve CH0 last_dramc[0x00000000]
@ GATING_ERROR[0]
@ RD_TEST_DM_CMP_CPT2_RK0[0]
@ RD_TEST_DM_CMP_CPT2_RK1[0]
@ RD_TEST_DM_CMP_ERR2_RK0[0] <-Ignore (w/o test)
@ RD_TEST_DM_CMP_ERR2_RK1[0] <-Ignore (w/o test)
@ RD_TEST_DLE_CNT_OK2_RK0[0]
@ RD_TEST_DLE_CNT_OK2_RK1[0]
@ RDWR_TEST_DM_CMP_CPT2_RK0[0]
@ RDWR_TEST_DM_CMP_CPT2_RK1[0]
@ RDWR_TEST_DM_CMP_ERR2_RK0[0]
@ RDWR_TEST_DM_CMP_ERR2_RK1[0]
@ RDWR_TEST_DLE_CNT_OK2_RK0[0]
@ RDWR_TEST_DLE_CNT_OK2_RK1[0]
@ Check DRAMC enter Self-Refresh-> MISC_STATUSA_SREF_STATE=[0x1]
@ DDR reserve CH1 last_dramc[0x00000000]
@ GATING_ERROR[0]
@ RD_TEST_DM_CMP_CPT2_RK0[0]
@ RD_TEST_DM_CMP_CPT2_RK1[0]
@ RD_TEST_DM_CMP_ERR2_RK0[0] <-Ignore (w/o test)
@ RD_TEST_DM_CMP_ERR2_RK1[0] <-Ignore (w/o test)
@ RD_TEST_DLE_CNT_OK2_RK0[0]
@ RD_TEST_DLE_CNT_OK2_RK1[0]
@ RDWR_TEST_DM_CMP_CPT2_RK0[0]
@ RDWR_TEST_DM_CMP_CPT2_RK1[0]
@ RDWR_TEST_DM_CMP_ERR2_RK0[0]
@ RDWR_TEST_DM_CMP_ERR2_RK1[0]
@ RDWR_TEST_DLE_CNT_OK2_RK0[0]
@ RDWR_TEST_DLE_CNT_OK2_RK1[0]
@ Check DRAMC enter Self-Refresh-> MISC_STATUSA_SREF_STATE=[0x1]
[USB] Switch to UART mode!!
其实原因就是触发异常后,准备进入lk抓取dump的时候时候,在pl阶段异常后重启,异常的log时间点:848行~1449行之间
848: [USB] Switch to UART mode!!
1449: [USB] Switch to UART mode!!

对比正常的时间log如下,就是该区间段存在问题:
[DOE_ENV]get_env dvfs_v_mode
[DOE_ENV]get_env vcore
dvfsrc: INFO2: 0x0
is_rising_need: PTPOD: 0x749
is_rising_need: PTPOD: 0x749
dvfsrc_opp_level_mapping: OP_TYPE=1, VMODE=0, RSV4=100000
dvfsrc_opp_level_mapping: FINAL vcore_opp_uv: 750000, 725000, 650000, 600000, 550000
#T#dvfsrc_opp_level_mapping=3
[DDR Reserve] DVFSRC success! (dvfsrc_en=1)
[MT6359] 1 6,56
[MT6315]1 16,80
[MT6315]1 16,80
[MT6359] cnvdata 1800000, n_size 3
[MT6359] 1 9,12
@ DDR reserve CH0 last_dramc[0x0000333F]
@ GATING_ERROR[0]
@ RD_TEST_DM_CMP_CPT2_RK0[1]
@ RD_TEST_DM_CMP_CPT2_RK1[1]
@ RD_TEST_DM_CMP_ERR2_RK0[1] <-Ignore (w/o test)
@ RD_TEST_DM_CMP_ERR2_RK1[1] <-Ignore (w/o test)
@ RD_TEST_DLE_CNT_OK2_RK0[1]
@ RD_TEST_DLE_CNT_OK2_RK1[1]
@ RDWR_TEST_DM_CMP_CPT2_RK0[1]
@ RDWR_TEST_DM_CMP_CPT2_RK1[1]
@ RDWR_TEST_DM_CMP_ERR2_RK0[0]
@ RDWR_TEST_DM_CMP_ERR2_RK1[0]
@ RDWR_TEST_DLE_CNT_OK2_RK0[1]
@ RDWR_TEST_DLE_CNT_OK2_RK1[1]
@ Check DRAMC enter Self-Refresh-> MISC_STATUSA_SREF_STATE=[0x1]
@ DDR reserve CH1 last_dramc[0x0000333F]
@ GATING_ERROR[0]
@ RD_TEST_DM_CMP_CPT2_RK0[1]
@ RD_TEST_DM_CMP_CPT2_RK1[1]
@ RD_TEST_DM_CMP_ERR2_RK0[1] <-Ignore (w/o test)
@ RD_TEST_DM_CMP_ERR2_RK1[1] <-Ignore (w/o test)
@ RD_TEST_DLE_CNT_OK2_RK0[1]
@ RD_TEST_DLE_CNT_OK2_RK1[1]
@ RDWR_TEST_DM_CMP_CPT2_RK0[1]
@ RDWR_TEST_DM_CMP_CPT2_RK1[1]
@ RDWR_TEST_DM_CMP_ERR2_RK0[0]
@ RDWR_TEST_DM_CMP_ERR2_RK1[0]
@ RDWR_TEST_DLE_CNT_OK2_RK0[1]
@ RDWR_TEST_DLE_CNT_OK2_RK1[1]
@ Check DRAMC enter Self-Refresh-> MISC_STATUSA_SREF_STATE=[0x1]
ALL DRAM CHAN is not in self-refresh
[DDR Reserve] release dram from self-refresh PASS!
[DDR reserve] EMI CEN CONA: F053F154
[DDR reserve] EMI CHN CONA: 488FC51
#T#chk DDR Reserve status=21

出错函数:
int drm_is_reserve_ddr_mode_success(void)

{
        unsigned int drm_dbg_ctrl;
       /*
         * MTK_DRM_DDR_RESERVE_RTA bit will be reset by modifying register MODE.
         * Read DEBUG_CTL value kept by mtk_drm_get_debug_ctl().
         */
        drm_dbg_ctrl = mtk_drm_get_debug_ctl();
=》
#define MTK_DRM_DDR_RESERVE_STA                (0x00100000)
#define MTK_DRM_BASE                (0x1000D000)
#define MTK_DRM_DEBUG_CTL        (MTK_DRM_BASE+0x0030) =》读这个位置的值,要和MTK_DRM_DDR_RESERVE_STA匹配,才会DDR reserved success,这里 应该是kernel里面设置的
        if (drm_dbg_ctrl & MTK_DRM_DDR_RESERVE_STA) {
                printf("DRM DDR reserve mode success! %x\n", drm_dbg_ctrl);
                return 1;
        } else {
                printf("DRM DDR reserve mode FAIL! %x\n", drm_dbg_ctrl);
                return 0;
        }
}

三 分析思路

fulldump的生成原理其实和minidump类似,都是在lk阶段dump数据到设备分区里面,不同的是,fulldump会dump完整的内存信息,在minidump之后继续执行,所以整个关键流程都是lk,因此问题分析主要依赖串口log。

3.1 串口log

看fulldump的生成过程主要看mini dump完成之后,关键字就是:kedump mini done,下面就是准备生成fulldump的时候异常了

kedump: data:0x508425dc, size:0xa000, offset:0x1ce3e3d, va:0xffff0000050425dc
kedump add: ZAEE_LOG[61] a000/a000@1ce3e3d
kedump mini done
aee_check_enable:295: lk aee dcfg = full (lv.2)
mrdump_get_env:106: mrdump_output: internal-storage
aee_check_enable:295: lk aee dcfg = full (lv.2)
wait TE int 100!
wait TE int 99!
wait TE int 98!
wait TE int 97!
wait TE int 96!
wait TE int 95!
wait TE int 94!
wait TE int 93!
wait TE int 92!
wait TE int 91!
wait TE int 90!
wait TE int 89!
wait TE int 88!
wait TE int 87!
wait TE int 86!
wait TE int 85!
wait TE int 84!
wait TE int 83!
wait TE int 82!
wait TE int 81!
wait TE int 80!
wait TE int 79!
wait TE int 78!
wait TE int 77!
wait TE int 76!
wait TE int 75!
wait TE int 74!
wait TE int 73!
wait TE int 72!
wait TE int 71!
wait TE int 70!
wait TE int 69!
wait TE int 68!
[MRDUMP11] I:output_device : 3
[MRDUMP11] D:Boot record found at 0xffff000000043000[5852]
data fault: PC at 0xffff000050729eb0, FAR 0xffff000104903da0, iss 0x45
ESR 0x96000045: ec 0x25, il 0x2000000, iss 0x45
iframe 0xffff0000508f49f0:
x0  0xffff00005077a228 x1  0xffff00005077a228 x2  0x        ff000000 x3  0x            ce48
x4  0x               0 x5  0x               0 x6  0x              25 x7  0x              5e
x8  0xffff0000508f6180 x9  0xffff0001048d0480 x10 0x               0 x11 0xffff00005077a228
x12 0x          a0a0a0 x13 0x               0 x14 0xffff00005077a978 x15 0x              17
x16 0x              16 x17 0x              15 x18 0x              13 x19 0x              65
x20 0xffff000050855000 x21 0xffff000050855000 x22 0xffff000050855000 x23 0x              8f
x24 0xffff000050855000 x25 0x               0 x26 0x               0 x27 0x               0
x28 0x               0 x29 0xffff0000508f4b00 lr  0xffff000050729aec usp 0xfe0689b3722bd01d
elr 0xffff000050729eb0
spsr 0x        82000305
panic (caller 0xffff00005070125c): die
HALT: action = 0, reason = 9
logstore_shutdown_callback done
mt6315_shutdown_callback done
mt6315_shutdown_callback done
mt6373_shutdown_callback done
mtk_dbgtop_shutdown_callback done
logstore_sync_callback done
reboot_reason_shutdown_callback done
HALT: spinning forever..., ret = 0

3.2 log解析

看到上面出现panic,自然想到需要看看panic的具体code位置,5.10项目,mtk针对lk有三类elf文件,我们定位问题的时候主要依赖AEE_OBJ里面的,通过addr2line定位具体代码行

junfang1@buildsrv-n43:~/work/0_code/s0-trank-user/out_vnd/target/product/ad10_h832/obj$ find . -name "lk.elf"
./AEE_OBJ/build-ad10_h832/lk.elf
./BL2_EXT_OBJ/build-ad10_h832/lk.elf
./LK_OBJ/build-ad10_h832/lk.elf
junfang1@buildsrv-n43:~/work/0_code/s0-trank-user/out_vnd/target/product/ad10_h832/obj$ aarch64-linux-android-addr2line -e ./AEE_OBJ/build-ad10_h832/lk.elf -a 0xffff000050729eb0
0xffff000050729eb0
/work/junfang1/work/0_code/s0-trank-user/vendor/mediatek/proprietary/bootable/bootloader/lk2/platform/mediatek/common/video/mtk_cfb.c:396

可以看到:cfb_dchars函数里面出现异常

static void cfb_dchars(int x, int y, unsigned char *s, int count)
{
    unsigned char *pos = NULL;
    unsigned char *tdest = NULL;
    unsigned char *pdest = NULL;
    unsigned char *pfont = NULL;
    unsigned int data_fmt = 0;
    unsigned int row = 0;
    unsigned int offs = 0;
    unsigned char high_bits = 0;
    unsigned char low_bits = 0;

    pdest = cfb_fb_addr + y * LINE_SIZE + x * PIXEL_SIZE;
    data_fmt = DATA_FMT;

    pfont = mtk_vdo_fntdata;
    switch (data_fmt) {
    case CFB_555RGB_15BIT:
        while (count--) {
            offs = (*s++) * MTK_VFH;
            pos = pfont + offs;
            row = MTK_VFH;
            for (tdest = pdest; row--; tdest += LINE_SIZE) {
                unsigned char bits = *pos++;

                ((uint32_t *)tdest)[0] =
                    SHTSWAP32((cfb_font_dtable15[bits >> 6] & cfb_eorx) ^ cfb_bgx);
                ((uint32_t *)tdest)[1] =
                    SHTSWAP32((cfb_font_dtable15[bits >> 4 & 3] & cfb_eorx) ^ cfb_bgx);
                ((uint32_t *)tdest)[2] =
                    SHTSWAP32((cfb_font_dtable15[bits >> 2 & 3] & cfb_eorx) ^ cfb_bgx);
                ((uint32_t *)tdest)[3] =
                    SHTSWAP32((cfb_font_dtable15[bits & 3] & cfb_eorx) ^ cfb_bgx);
            }
            pdest = pdest + MTK_VFW * PIXEL_SIZE;
        }
        break;
    case CFB_565RGB_16BIT:
        while (count--) {
            offs = (*s++) * MTK_VFH;
            pos = pfont + offs;
            row = MTK_VFH;
            if (!strncmp(MTK_LCM_PHYSICAL_ROTATION, "180", 3)) {
                for (tdest = pdest + row * LINE_SIZE; row--; tdest -= LINE_SIZE) {
                    unsigned char bits = *pos++;

                    ((uint32_t *)tdest)[0] =
                        SWAP32_16((lk_cfb_font_dtable16[bits & 3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[1] =
                        SWAP32_16((lk_cfb_font_dtable16[bits >> 2 & 3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[2] =
                        SWAP32_16((lk_cfb_font_dtable16[bits >> 4 & 3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[3] =
                        SWAP32_16((lk_cfb_font_dtable16[bits >> 6] & cfb_eorx) ^ cfb_bgx);
                }
                pdest = pdest + MTK_VFW * PIXEL_SIZE;
            } else {
                for (tdest = pdest; row--; tdest += LINE_SIZE) {
                    unsigned char bits = *pos++;

                    ((uint32_t *)tdest)[0] =
                        SHTSWAP32((lk_cfb_font_dtable16[bits >> 6] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[1] =
                        SHTSWAP32((lk_cfb_font_dtable16[bits >> 4 & 3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[2] =
                        SHTSWAP32((lk_cfb_font_dtable16[bits >> 2 & 3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[3] =
                        SHTSWAP32((lk_cfb_font_dtable16[bits & 3] & cfb_eorx) ^ cfb_bgx);
                }
                pdest = pdest + MTK_VFW * PIXEL_SIZE;
            }
        }
        break;
    case CFB_332RGB_8BIT:
    case CFB_FMT_8BIT:
        while (count--) {
            offs = (*s++) * MTK_VFH;
            pos = pfont + offs;
            row = MTK_VFH;
            for (tdest = pdest; row--; tdest += LINE_SIZE) {
                high_bits = *pos >> 4;
                low_bits = *pos & 15;
                ++pos;
                ((vaddr_t *)tdest)[0] =
                    (cfb_font_dtable8[high_bits] & cfb_eorx) ^ cfb_bgx;
                ((vaddr_t *)tdest)[1] =
                    (cfb_font_dtable8[low_bits] & cfb_eorx) ^ cfb_bgx;
            }
            pdest = pdest + MTK_VFW * PIXEL_SIZE;
        }
        break;
    case CFB_888RGB_24BIT:
        while (count--) {
            offs = (*s++) * MTK_VFH;
            pos = pfont + offs;
            row = MTK_VFH;
            for (tdest = pdest; row--; tdest += LINE_SIZE) {
                high_bits = *pos >> 4;
                low_bits = *pos & 15;
                ++pos;
                ((uint32_t *)tdest)[0] =
                    (lk_cfb_font_dtb24[high_bits][0] & cfb_eorx) ^ cfb_bgx;
                ((uint32_t *)tdest)[1] =
                    (lk_cfb_font_dtb24[high_bits][1] & cfb_eorx) ^ cfb_bgx;
                ((uint32_t *)tdest)[2] =
                    (lk_cfb_font_dtb24[high_bits][2] & cfb_eorx) ^ cfb_bgx;
                ((uint32_t *)tdest)[3] =
                    (lk_cfb_font_dtb24[low_bits][0] & cfb_eorx) ^ cfb_bgx;
                ((uint32_t *)tdest)[4] =
                    (lk_cfb_font_dtb24[low_bits][1] & cfb_eorx) ^ cfb_bgx;
                ((uint32_t *)tdest)[5] =
                    (lk_cfb_font_dtb24[low_bits][2] & cfb_eorx) ^ cfb_bgx;
            }
            pdest = pdest + MTK_VFW * PIXEL_SIZE;
        }
        break;
    case CFB_X888RGB_32BIT:
        while (count--) {
            offs = (*s++) * MTK_VFH;
            pos = pfont + offs;
            row = MTK_VFH;
            if (!strncmp(MTK_LCM_PHYSICAL_ROTATION, "90", 2)) {
                for (tdest = pdest + row * PIXEL_SIZE; row--; tdest -= PIXEL_SIZE) {
                    high_bits = *pos >> 4;
                    low_bits = *pos & 15;
                    ++pos;
                    ((uint32_t *)tdest)[0 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][0] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[1 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][1] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[2 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][2] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[3 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[4 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][0] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[5 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][1] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[6 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][2] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[7 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][3] & cfb_eorx) ^ cfb_bgx);
                }
                pdest = pdest + MTK_VFH * PIXEL_SIZE;
            } else if (!strncmp(MTK_LCM_PHYSICAL_ROTATION, "180", 3)) {
                for (tdest = pdest + row * LINE_SIZE; row--; tdest -= LINE_SIZE) {
                    high_bits = *pos >> 4;
                    low_bits = *pos & 15;
                    ++pos;
                    ((uint32_t *)tdest)[7] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][0] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[6] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][1] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[5] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][2] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[4] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][3] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[3] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][0] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[2] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][1] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[1] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][2] & cfb_eorx) ^ cfb_bgx);
                    ((uint32_t *)tdest)[0] =
                        0xff000000 |
                        ((lk_cfb_font_dtable32[low_bits][3] & cfb_eorx) ^ cfb_bgx);
                }
                pdest = pdest + MTK_VFW * PIXEL_SIZE;
            } else if (!strncmp(MTK_LCM_PHYSICAL_ROTATION, "270", 3)) {
                for (tdest = pdest; row--; tdest += PIXEL_SIZE) {
                    high_bits = *pos >> 4;
                    low_bits = *pos & 15;
                    ++pos;
                    ((uint32_t *)tdest)[7 * CFB_WIDTH + MTK_VFH * CFB_WIDTH] =
                                            0xff000000 |
                        ((lk_cfb_font_dtable32[high_bits][0] & cfb_eorx) ^ cfb_bgx);
                        =》最后挂的位置

3.3 反推验证

跟踪下cfb_dchars函数调用:

video_printf->video_puts->video_putc->cfb_putchar

video_printf是一个打印函数在lk阶段多处调用,看起来是某个地方调用此函数传入了不合法的参数触发的panic,基于此怀疑,做如下修改

junfang1@buildsrv-n43:~/work/0_code/s0-trank-user/vendor/mediatek/proprietary/bootable/bootloader/lk2/platform/mediatek/common/video$ git diff mtk_cfb.c
diff --git a/platform/mediatek/common/video/mtk_cfb.c b/platform/mediatek/common/video/mtk_cfb.c
index 021b076db..dd39797c8 100644
--- a/platform/mediatek/common/video/mtk_cfb.c
+++ b/platform/mediatek/common/video/mtk_cfb.c
@@ -600,7 +600,7 @@ void video_printf(const char *fmt, ...)
     va_end(args);

     /* Print the string */
-    video_puts(printbuffer);
+    //video_puts(printbuffer);
 }

单编验证:
source tran_setenv.sh tran_projects/ad10/ad10_h832_a1 efuse vnd user
export OUT_DIR=out_vnd && lunch vext_ad10_h832-user
make lk -j32 2>&1 | tee build-lk.txt

触发panic,查看结果,已经可以正常生成

四 补充分析

基于上面的修复之后,发现概率性的仍然存在问题,存在以下两种情况,问题1:

kedump: crc = 0xd266a225
mrdump_get_reserved_info_by_index:170: index(127) is large than mblock_reserved_num(45)
[MRDUMP11] I:userdata size: 232319 Mb
[MRDUMP11] I:Output to EXT4 Partition emmc
[MRDUMP11] E: first check
mrdump_ext4_output:313:  Pre-Allocate has no LBA markers(lbaooo=0). RAM-Dump stop!
[MRDUMP11] E:mrdump_ext4_output fail (-3)
HALT: action = 1, reason = 1
logstore_shutdown_callback done
mt6315_shutdown_callback done
mt6315_shutdown_callback done
mt6373_shutdown_callback done
mtk_dbgtop_shutdown_callback done
logstore_sync_callback done
reboot_reason_shutdown_callback done
HALT: spinning fore 

我们去此时可以看到代码里面对lbaooo 节点有依赖,当这个节点生成以后可以完整的fulldump了

cat /sys/module/mrdump/parameters/lbaooo 这个值变成非0 ,这个值目的就是给mrdump 的size 去申请一个空间 这块空间比较大 有兴趣小伙伴 可以研究一下。所以此时等这个值变成非0 后可以得到完整的fulldump。一般时间大约在1 -2 min 。

TECNO-AD10:/ # mrdump_tool output-get

internal-storage

这个可以看我们的fulldump 是存到哪里 常见的有usb 输出 parttion 输出 ,internal-storage

293 int mrdump_ext4_output(const struct mrdump_control_block *mrdump_cb,
294                        const struct kzip_addlist *memlist,
295                        const struct kzip_addlist *memlist_cmm,
296                        struct mrdump_dev *mrdump_dev)
297 {
298     uint8_t InfoLBA[MRDUMP_PAF_TOTAL_SIZE];
299     unsigned int mycrc;
300 
301     if (mrdump_dev == NULL)
302         return -BDATA_STATE_FILE_ACCESS_ERROR;
303 
304     voprintf_info("Output to EXT4 Partition %s\n", mrdump_dev->name);
305 
306     // pre-work for ext4 LBA
307     bzero(InfoLBA, sizeof(InfoLBA));
308 
309     dump_paf_info(InfoLBA, " first check\n");
310     // Error 1. InfoLBA starting address not available
311     if (mrdump_cb->output_fs_lbaooo == 0) {
312         LTRACEF_LEVEL(ALWAYS, " Pre-Allocate has no LBA markers(lbaooo=%u). RAM-Dump stop!\n",                                                                                                                                                                          
313                 mrdump_cb->output_fs_lbaooo);
314         return -BDATA_STATE_BLOCK_HEADER_ERROR;
315     }
316     if (!mrdump_dev->read(mrdump_dev, ext4_lba_to_block_offset(mrdump_cb->output_fs_lbaooo),
317             (uint8_t *)InfoLBA, sizeof(InfoLBA))) {
318         LTRACEF_LEVEL(ALWAYS, " SDCard: Reading InfoLBA failed.\n");
319         return -BDATA_STATE_FILE_ACCESS_ERROR;

问题2:

项目是折叠屏,展开的时候:
~/shell$ ./aarch64-linux-android-addr2line -Cfe ~/code/s0_trunk/out_vnd/target/product/ad10_h832/obj/AEE_OBJ/build-ad10_h832/lk.elf 0xffff000050729f8c  
/work/xx/code/s0_trunk/vendor/mediatek/proprietary/bootable/bootloader/lk2/platform/mediatek/common/video/mtk_cfb.c:396

对应的就是之前分析思路的那种情况

折叠的时候:
~/shell$ ./aarch64-linux-android-addr2line -Cfe ~/code/s0_trunk/out_vnd/target/product/ad10_h832/obj/AEE_OBJ/build-ad10_h832/lk.elf 0xffff00005074edbc
  /work/xx/code/s0_trunk/vendor/mediatek/proprietary/bootable/bootloader/lk2/lib/libc/string/memcpy.c:59

memcpy-- 这个memcpy lk2里面太多了,但是 猜测应该是/lk2/platform/mediatek/mt6893/disp 
lk2 现在没有办法去看完整的trace,还有重要的一点 去掉app/aee 下面所有video_printf 发现可以虽然折叠后副屏幕没有橙屏,但是fulldump 可以生成,继续debug:

  58 static void aee_init(const struct app_descriptor *app)
 59 {
 60     LTRACEF_LEVEL(ALWAYS, "Trigger exception flow\n");
 61     /* boot args info */
 62     LTRACEF_LEVEL(ALWAYS, "boot_reason(%d) boot_mode(%d)\n",
 63                   platform_get_boot_reason(),
 64                   platform_get_boot_mode());
 65     LTRACEF_LEVEL(ALWAYS, "ddr_reserve_ready(%d) ddr_reserve_success(%d)\n",
 66                   aee_ddr_reserve_enable(), aee_ddr_reserve_success());
 67 
 68     /* map aee_debug_kinfo */
 69     if (aee_debug_kinfo_map()) {
 70         LTRACEF_LEVEL(ALWAYS, "aee_debug_kinfo map fail\n");
 71         exit_aee_init();
 72     }
 73 
 74     switch (aee_check_enable()) {
 75     case AEE_ENABLE_NO:
 76         LTRACEF_LEVEL(ALWAYS, "aee disabled but jump into aee lk?\n");
 77         exit_aee_init();
 78         break;
 79     case AEE_ENABLE_MINI:
 80         kedump_mini();
 81         exit_aee_init();
 82         break;
 83     case AEE_ENABLE_FULL:
 84         kedump_mini();
 85         switch (mrdump_detection()) {                                                                                                                                                                                                                                   
 86         case -1:
 87             exit_aee_init();
 88         case 0:
 89             /* Do nothing for fastboot loop */
 90             break;
 91         case 1:
 92             mrdump_run();
 93             exit_aee_init();
 94             break;



主要看上面两个函数,里面有video_printf 的打印,这里不断加log 会发现异常时候是vedio_printf 多次调用。正常来说aee_init 里面会有video_print,我们暂时把里面的这个注释掉,只看mrdump 函数
164 void mrdump_run(void)
165 {
166     int err;
167 
168 #ifdef WITH_PLATFORM_MEDIATEK_COMMON_VIDEO
169     video_set_color(0xFFFF7F00, 0xFFFF0000);//这里是我们设置橙色的地方
170     video_clear();
171     video_set_cursor(0, 0);
172 #endif
//这里加个video_printf("xxxxx");
video_printf("xx test\n");
while(1);

此时看到橙色的效果是这样,但是字体的位置明显不对,这里大概知道了,video_printf 打印超出了某个显示的size 溢出 所以在打印里面异常,此时找display同事负责处理就好

五 总结

fulldump常见另外几种无法生成情况:

I:Ramdump size is 0, no data to dump //lbaooo 节点还没有生成(大约需要1-2min内)就发生了异常

W:Ramdump process interrupted, no data to dump//fulldump 生成过程中被中断(长按power)

E:pafile_read_info: LBA info CRC error (c:6ab6b2d5, v:00000000) // fulldump 生成后 还在temp 阶段就被重启

  • 3
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值