--------- beginning of crash
01-02 09:14:39.508 775 5091 F libc : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)
01-02 09:14:39.762 28465 28465 F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
01-02 09:14:39.762 28465 28465 F DEBUG : Build fingerprint: 'Android/Aiolos/msm8953_64:7.1.2/Aiolos/20180811_1106:userdebug/test-keys'
01-02 09:14:39.762 28465 28465 F DEBUG : Revision: '0'
01-02 09:14:39.762 28465 28465 F DEBUG : ABI: 'arm'
01-02 09:14:39.763 28465 28465 F DEBUG : pid: 775, tid: 5091, name: CAM_MctBus >>> /system/bin/mm-qcamera-daemon <<<
01-02 09:14:39.763 28465 28465 F DEBUG : signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xf4f17000
01-02 09:14:39.763 28465 28465 F DEBUG : r0 f4f17000 r1 00000003 r2 00008318 r3 ebdedac0
01-02 09:14:39.763 28465 28465 F DEBUG : r4 f08de1e0 r5 00000001 r6 ebdf5dd8 r7 f08de224
01-02 09:14:39.763 28465 28465 F DEBUG : r8 00000001 r9 e992d540 sl f1548800 fp 00000003
01-02 09:14:39.763 28465 28465 F DEBUG : ip f4255df4 sp ea8037e8 lr f423ed8f pc f423c166 cpsr a0070030
01-02 09:14:39.781 28465 28465 F DEBUG :
01-02 09:14:39.781 28465 28465 F DEBUG : backtrace:
01-02 09:14:39.782 28465 28465 F DEBUG : #00 pc 00006166 /system/vendor/lib/libmmcamera2_mct.so
01-02 09:14:39.782 28465 28465 F DEBUG : #01 pc 00046fe3 /system/lib/libc.so (_ZL15__pthread_startPv+22)
01-02 09:14:39.782 28465 28465 F DEBUG : #02 pc 00019ced /system/lib/libc.so (__start_thread+6)
根据kernel log
[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003
以及反汇编
00006166 LDR R0, [R0]
我们可以得到是 0xf4f17000 这个指针地址的取值出现了错误。
根据反汇编我们可以定位到函数的位置
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/mct/controller/mct_controller.c
static boolean mct_controller_handle_SOF_proc(
mct_pipeline_t *pipeline, mct_bus_msg_t *bus_msg)
{
CLOGI(CAM_MCT_MODULE, "(sofdelay, curr_sofdelay) = (%d, %d) kptr %u",
pipeline->sof_delay, current_sof_delay, *(int *)isp_sof_bus_msg->kernel_sof_ptr);
}
我们可以通过打印信息验证这一点
01-02 09:14:39.301 775 28447 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.301 775 28447 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.301 775 5091 E mm-camera: <MCT ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.301 775 5091 I mm-camera: <MCT >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-2, -3) kptr 0xf4f17000
这个地址 0xf4f17000 就是 isp_sof_bus_msg->kernel_sof_ptr 的值,而 Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus) 中也是这个地址。
下面我们分析这个地址的来源,根据分析我们可以得到地址来源 ,iface_axi_open iface_thread_start iface_axi_handle_sof_event 三个函数列出如下,我们可以从中得知
isp_sof_bus_msg->kernel_sof_ptr 的创建和传递:
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c
static int iface_axi_open(iface_axi_hw_t *axi_hw, char *dev_name)
{
int rc = 0;
switch (axi_hw->hw_state) {
case IFACE_AXI_HW_STATE_INVALID: {
axi_hw->fd = open(dev_name, O_RDWR | O_NONBLOCK);
if (axi_hw->fd <= 0) {
CDBG_ERROR("%s: cannot open '%s'\n", __func__, dev_name);
axi_hw->fd = 0;
return -1;
}
/* hw opened, set state to idle */
axi_hw->hw_state = IFACE_AXI_HW_STATE_DEV_OPEN;
}
break;
default: {
/* cannot open twice, nop */
rc = -EAGAIN;
}
break;
}
if (rc == 0) {
/* fork the hw thread to poll on vfe subdev and pipe */
rc = iface_thread_start(&axi_hw->thread_poll, axi_hw, axi_hw->fd);
}
if(rc == 0) {
/* fork the hw thread for stream on/off */
rc = iface_sem_thread_start(&axi_hw->thread_stream, axi_hw);
}
return rc;
}
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c
int iface_thread_start(iface_thread_t *thread_data, void *hw_ptr, int poll_fd)
{
axi_hw->mmap_addr =
mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, poll_fd, 0);
}
以及
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c
/** iface_axi_handle_sof_event:
*
* Description:
* this funciton is called when SOF event is received from
* kernel
*
* @isp_hw: isp hw
* @isp_event_data: isp event data
*
* Returns 0 for success and negative error on failure
**/
static int iface_axi_handle_sof_event(
iface_axi_hw_t *axi_hw,
struct msm_isp_event_data *sof,
enum msm_vfe_input_src input_src)
{
if (axi_hw->mmap_addr != MAP_FAILED) {
isp_sof.kernel_sof_ptr = axi_hw->mmap_addr;
ksofid = *(uint32_t *)axi_hw->mmap_addr;
} else {
isp_sof.kernel_sof_ptr = NULL;
}
}
这里的 axi_hw->fd =open(“/dev/v4l-subdev13”)对应着的设备路径如下
==kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp.c==
大致我们可以分析状况如下:
vendor 还在处理数据,但是mmap的地址已经被释放了,现在我们来验证这一想法。
下面我们摘录出log的异常地方便于分析
01-02 09:14:39.463 775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 2 ksofid: 0
01-02 09:14:39.463 775 5091 E mm-camera: <MCT ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.464 775 5091 I mm-camera: <MCT >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-1, -2) kptr 0xf4f17000
01-02 09:14:39.464 775 5091 E mm-camera-iface2: iface_post_control_sof_to_thread: E, session id = 3, user stream id = 15
01-02 09:14:39.471 775 5089 I mm-camera: <MCT >< INFO> 3674: mct_pipeline_process_set: command=8000009
01-02 09:14:39.471 775 5089 I mm-camera: <MCT >< INFO> 3812: mct_pipeline_process_set: STREAM-OFF on stream 0x30001 stream type=8
01-02 09:14:39.479 775 5089 E mm-camera: <ISP ><ERROR> 1006: isp_handler_control_streamoff: stream_off 2 2 ide 30001
01-02 09:14:39.480 775 5089 E mm-camera: <ISP ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.480 775 5089 E mm-camera: <ISP ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.481 775 5089 I mm-camera: <IFACE >< INFO> 1622: iface_streamoff_to_thread: iface_streamoff_to_thread: E, session id = 3, user stream id = 1
01-02 09:14:39.481 775 5089 E mm-camera-iface2: iface_session_sem_thread_execute: E
01-02 09:14:39.481 775 5072 E mm-camera-iface2: iface:streamoff
01-02 09:14:39.481 775 5072 I mm-camera: <IFACE >< INFO> 2216: iface_streamoff: E, session_id 3 mct_stream_id = 1
01-02 09:14:39.481 775 5072 E mm-camera-iface2: iface_decide_session_hw_stream: no Session based hw streams to streamon, *num_hw_streams = 0
01-02 09:14:39.482 775 28463 E mm-camera-iface2: iface_axi_cfg_stream: E, start_flag = 0, sessionid = 3
01-02 09:14:39.482 775 28463 E mm-camera-iface2: iface_axi_cfg_stream:1617: Cmd (0) axi stream 0x205 on vfe 0
01-02 09:14:39.483 775 28462 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.483 775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Revmoe bufq in kernel with hw stream 0x1, use stream 1
01-02 09:14:39.483 775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.484 775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Unregister buf session_id = 3, stream_id = 1, buf_handle = 0x340a0000
01-02 09:14:39.484 775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: stats_cmd: enable = 0, num of stats stream = 0
01-02 09:14:39.484 775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: no stats stream to START/STOP, stats cmd.num_streams = 0
01-02 09:14:39.486 775 5072 E mm-camera-iface2: iface_streamoff: hw_stream id 1 mct_stream_id 1 user_stream_id 1
01-02 09:14:39.486 775 5072 I mm-camera: <IFACE >< INFO> 2406: iface_streamoff: session_id = 3, active_streams = 0
01-02 09:14:39.486 775 5072 I mm-camera: <IFACE >< INFO> 2425: iface_streamoff: session id = 0x3,Delta between vfe irqs 0 sec 0 usec
01-02 09:14:39.486 775 5072 E mm-camera-iface2: iface_streamoff: hw_stream 0xf106777c type 0 axi_path 5 id 1
01-02 09:14:39.486 775 5072 I mm-camera: <IFACE >< INFO> 10046: iface_util_release_resource: camif_cnt= 0, rdi_cnt= 0, used mask 2
01-02 09:14:39.486 775 5072 E mm-camera-iface2: iface_destroy_hw: E, hw_idx 0
01-02 09:14:39.487 775 5072 E mm-camera-iface2: iface_destroy_hw: Destroy axi hw on VFE :0
--------- beginning of crash
01-02 09:14:39.508 775 5091 F libc : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)
由log可以看出 ==iface_axi_handle_sof_event== 这个事件还在处理中,根据函数注释我们可以得知
this funciton is called when SOF event is received from kernel
01-02 09:14:39.483 775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
但是在此之前,根据时间戳发现却已经执行了 streamoff
01-02 09:14:39.481 775 5072 E mm-camera-iface2: iface:streamoff
现在我们把整个函数整理如下
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c
/** iface_thread_main_loop
*
* DESCRIPTION: main loop to proc cmd or poll v4l2 event
*
**/
static void *iface_thread_main_loop(void *data)
{
int rc = 0, i;
int timeout;
int thread_exit = 0;
iface_thread_t *thread_data = (iface_thread_t *)data;
iface_axi_hw_t *axi_hw = (iface_axi_hw_t *)thread_data->hw_ptr;
timeout = thread_data->poll_timeoutms;
/* wake up the creater first */
sem_post(&thread_data->sig_sem);
while(!thread_exit) {
for(i = 0; i < thread_data->num_fds; i++)
thread_data->poll_fds[i].events = POLLIN|POLLRDNORM|POLLPRI;
rc = poll(thread_data->poll_fds, thread_data->num_fds, timeout);
if(rc > 0) {
if ((thread_data->poll_fds[0].revents & POLLIN) &&
(thread_data->poll_fds[0].revents & POLLRDNORM)) {
/* if we have data on pipe, we only process pipe in this iteration */
rc = iface_thread_proc_cmd(axi_hw, thread_data, &thread_exit, TRUE);
} else {
if ((thread_data->poll_fds[1].revents & POLLPRI) ||
(thread_data->poll_fds[1].revents & POLLIN) ||
(thread_data->poll_fds[1].revents & POLLRDNORM)) {
/* if we have data on subdev */
iface_axi_proc_subdev_event(axi_hw, thread_data);
}
}
} else {
CDBG_ERROR("error: poll() failed error =%s %d",
strerror(errno), errno);
}
}
if (thread_data->pipe_fds[0] > 0) {
close(thread_data->pipe_fds[0]);
thread_data->pipe_fds[0] = 0;
}
if (thread_data->pipe_fds[1] > 0) {
close(thread_data->pipe_fds[1]);
thread_data->pipe_fds[1] = 0;
}
return NULL;
}
iface_axi_proc_subdev_event—->iface_axi_handle_sof_event(axi_hw, sof, input_src);
—->ISP_EVENT_SOF—->iface_axi_handle_sof_event
而 ISP_EVENT_SOF 的事件来源是 msm_isp40.c ,代表着 kernel 接收到 sensor 的帧。
代码路径
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c
case VFE_RAW_0:
case VFE_RAW_1:
case VFE_RAW_2:
msm_isp_notify(vfe_dev, ISP_EVENT_SOF, i, ts);
或者
msm_isp_notify(vfe_dev, ISP_EVENT_SOF, VFE_PIX_0, ts);
这里可以验证我们的猜想,kernel的事件确实还通过v4l2上传到vendor去处理。
现在我们回到kernel的log
[ 2801.881016] msm_isp_open_node open_cnt 0
[ 2801.886397] msm_isp_open_node: HW Version: 0x10090000
[ 2801.892568] msm_isp_open_node open_cnt 1 exit 111111
[ 2801.963456] msm_isp_close_node E open_cnt 1
[ 2801.966810] msm_isp_close_node open_cnt 1 unregister page fault handler
[ 2801.976748] msm_isp_close_node X open_cnt 0 EXIT 1111111
[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003
也可以佐证我们的想法,已经执行完 msm_isp_close_node 函数才出现 pgd = ffffffc059f28000 这个取地址的错误。
我们可以打印如下函数中的 munmap(axi_hw->mmap_addr, PAGE_SIZE) 来验证
代码路径
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c
/** iface_axi_destroy_thread
*
* @thread: information about the thread to be destroyed
*
* Destroys isp hw thread
**/
static int iface_axi_destroy_thread(iface_thread_t *thread)
{
int len;
uint32_t cmd = IFACE_THREAD_CMD_DESTROY;
iface_axi_hw_t *axi_hw;
axi_hw = (iface_axi_hw_t *)thread->hw_ptr;
pthread_mutex_lock(&thread->cmd_mutex);
if (axi_hw->mmap_addr != MAP_FAILED) {
if (munmap(axi_hw->mmap_addr, PAGE_SIZE))
perror("munmap");
}
if (thread->return_code == -EPIPE) {
CDBG_ERROR("%s: Pipe read error\n", __func__);
pthread_mutex_unlock(&thread->cmd_mutex);
return -EPIPE;
}
len = write(thread->pipe_fds[1], &cmd, sizeof(cmd));
if (len != sizeof(cmd)) {
/* we got error here */
pthread_mutex_unlock(&thread->cmd_mutex);
CDBG_ERROR("%s: Pipe write error\n", __func__);
return -EPIPE;
}
/* we do not wait on sem but join to wait the thread to die. */
if (pthread_join(thread->pid, NULL) != 0)
CDBG("%s: pthread dead already\n", __func__);
sem_destroy(&thread->sig_sem);
pthread_mutex_destroy(&thread->busy_mutex);
pthread_mutex_unlock(&thread->cmd_mutex);
pthread_mutex_destroy(&thread->cmd_mutex);
return 0;
}