netd crash 当Wifi和3G进行切换的时候,netd crash,概率 10%,出现这样问题关键是怎么定位问题?solution一般比较简单。
platform: qualcomm 8225
pid: 156, tid: 362, name: netd >>> /system/bin/netd <<<
thread: netd
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR),fault addr 00000000
r0 40e96388 r1 00000072 r2 41c4bb4c r3 00000000
r4 41c4bb28 r5 41c596b0 r6 00000000 r7 41c49808
r8 41c4bb28 r9 00000000 sl 40e96554 fp 40e96992
ip 00000030 sp 40e96368 lr 4007bc91 pc 4007dcc0 cpsr 80000030
d0 6e49746573205d72 d1 516563616672656d
d2 656b636f6c5f686e d3 204e524157203a65
d4 0000000000000000 d5 0000000000000000
d6 0000000000000000 d7 00305dd600000000
d8 0000000000000000 d9 0000000000000000
d10 0000000000000000 d110000000000000000
d12 0000000000000000 d130000000000000000
d14 0000000000000000 d150000000000000000
d16 41482eeb0020c49c d173f50624dd2f1a9fc
d18 41a43c5be2000000 d190000000000000000
d20 0000000000000000 d210000000000000000
d22 0000000000000000 d230000000000000000
d24 0000000000000000 d250000000000000000
d26 0000000000000000 d270000000000000000
d28 0000000000000000 d290000000000000000
d30 0000000000000000 d310000000000000000
scr 00000010
backtrace:
#00 pc 00005cc0 /system/bin/netd
#01 pc 00003c8d /system/bin/netd (std::string::_M_assign(charconst*, char const*)+74)
stack:
40e96328 00000000
40e9632c 00000000
40e96330 40e96388 [stack:362]
40e96334 40e96414 [stack:362]
40e96338 40e96388 [stack:362]
40e9633c 00000006
40e96340 40e96414 [stack:362]
40e96344 40e9641a [stack:362]
40e96348 41c4bb28 [heap]
40e9634c 4007bc91 /system/bin/netd (std::string::_M_assign(charconst*, char const*)+78)
40e96350 40e96454 [stack:362]
40e96354 41c4bb28 [heap]
40e96358 41c596b0 [heap]
40e9635c 00000000
40e96360 df0027ad
40e96364 00000000
#00 40e96368 4033a004 /system/lib/libnetutils.so
........ ........
#01 40e96368 4033a004 /system/lib/libnetutils.so
40e9636c 401efacc
40e96370 ffffffff
40e96374 7fffffff
40e96378 00000000
40e9637c 401efacc
40e96380 00000000
40e96384 00000000
40e96388 656e6d72
40e9638c 00003074
40e96390 59cf220a
40e96394 00000000
40e96398 40e9638e [stack:362]
40e9639c 40e96388 [stack:362]
40e963a0 00001000
40e963a4 41c58448 [heap]
从backtrace上看netd crash在#00 pc00005cc0 /system/bin/netd点上,找到想对应的netd(请注意,这个netd不是system/bin/netd,这个文件不包含debug信息,而应该是system/symbol下面的netd,可能各个platform不太一样)利用gdb进行解析,
输入 b * 0x00005cc0
也可以通过arm-eabi-objdump -D -S netd把汇编传出来,确认一下问题。
找到BandwidthController.cpp中的675行
这是函数intBandwidthController::setInterfaceQuota(const char *iface, int64_t maxBytes) 中的it->ifaceName==ifaceName
ifaceName是传进来的一个参数,我们确认是正确的,在这个也没有被修改,而quotaIface是一个全局变量,这才是我们要怀疑的真正的点,同时在多线程中可能被修改。这里实际上有到了多线程保护的问题。接下来是问题的确认,netd模块模块处理的一些命令都是从NetDemondControl传下来的,基于这一点,我们可以在这个模块中加入test commnd然后在BandwidthController进行处理这样就可以缩小问题的范围。
//this test function.
int BandwidthController::BandwidthControllerTest()
{
ALOGV("func=%s,quotaAddr=0x%p", __func__,"aIfaces);
if(quotaIfaces.size()<=0)
{
ALOGV("func=%s,size=%d", __func__,NULL);
}else
{
ALOGV("func=%s,size=%d", __func__,quotaIfaces.size());
}
return 0;
}
最后发现是在函数中
int SoftapController::startDriver(char *iface) {
int ret;
ALOGE("%s\n",__func__);
if (mSock < 0) {
ALOGE("Softap driver start - failed to open socket");
return -1;
}
if (!iface || (iface[0] == '\0')) {
ALOGD("Softap driver start - wrong interface");
iface = mIface;
}
*mBuf = 0;
ret = setCommand(iface, "START");
if (ret < 0) {
ALOGE("Softap driver start: %d", ret);
return ret;
}
#ifdef ATH_STA_AP_CONCURRENT
if(isConcurrentMode()) {
char cmd[32]="p2p_group_remove p2p0";
char buf[32];
unsigned int len ;
ret = connect2Supplicant(ATH_P2P_IFACE);
if (ret < 0) {
ALOGE("Softap connect to p2p failed: %d", ret);
return ret;
}
#if 1
memset(buf,0,32);
len = sizeof(buf);
ret = sendSupplicantCommand(ATH_P2P_IFACE, cmd, buf, &len);
if (ret < 0) {
ALOGE("Softap send p2p cmd \"%s\" failed: %d",cmd, ret);
return ret;
}
closeSupplicantConnection(ATH_P2P_IFACE);
把这部分code去掉以后,问题就dispear了,到这里有可以迷茫了,这下层的东西都是common的,问题应该还是处在netd模块。但问题到底在哪里呢?SoftapController这个controller是为athros新加不是google原始。
最后发现是commandlistener.h中的头文件包含出现问题引起的,这是由于 wifi engineer在porting wifi的时候出现失误导致的问题。
//#include "SoftapController.h"
#include "SoftapControllerAtheros.h"
#endif
ifc_init();
ret = ifc_down("p2p0");
ifc_close();
if (ret < 0) {
ALOGE("Softap concurrent p2p %s down: %d", iface, ret);
return ret;
}
strncpy(nl_cmd_cfg.iface, "wlan0", 32);
if ((ret = nl80211_init(&nl_cmd_cfg))) {
ALOGE("couldn't init nl80211!: %s\n", strerror(errno));
return ret;
}
if ((ret = nl80211_sap_if_add(&nl_cmd_cfg, ATH_CONCURRENT_SAP_IFNAME))) {
ALOGE("couldn't add sap if: %s\n", strerror(errno));
nl80211_deinit(&nl_cmd_cfg);
return ret;
}
nl80211_deinit(&nl_cmd_cfg);
}
#endif
#ifdef HAVE_HOSTAPD
ifc_init();
if(isConcurrentMode())
ret = ifc_up(ATH_CONCURRENT_SAP_IFNAME);
else
ret = ifc_up(iface);
ifc_close();
#endif
usleep(AP_DRIVER_START_DELAY);
ALOGD("Softap driver start: %d", ret);
return ret;
}
下面是TomeStone所有