微信公众号,WeMobileDev 2021年7月19日发布的 微信Android客户端的ANR监控方案
该方案的所有代码已经在Matrix(https://github.com/Tencent/matrix)中开源,这篇文章将详细讲解源码实现。
当应用发生ANR之后,系统会收集许多进程,来dump堆栈,从而生成ANR Trace文件,收集的第一个,也是一定会被收集到的进程,就是发生ANR的进程,接着系统开始向这些应用进程发送SIGQUIT信号,应用进程收到SIGQUIT后开始dump堆栈。来简单画个示意图:
1.SignalAnrTracer onAlive方法里调用nativeInitSignalAnrDetective方法监听SIGQUIT信号
public class SignalAnrTracer extends Tracer {
//region 参数
private static final String TAG = "SignalAnrTracer";
//检测anr线程名字
//监控到SIGQUIT后,我们在20秒内(20秒是ANR dump的timeout时间)不断轮询自己是否有NOT_RESPONDING flag
//一旦发现有这个flag,那么马上就可以认定发生了一次ANR。
private static final String CHECK_ANR_STATE_THREAD_NAME = "Check-ANR-State-Thread";
//检测NOT_RESPONDING flag间隔时间
private static final int CHECK_ERROR_STATE_INTERVAL = 500;
//dump最长时间20s
private static final int ANR_DUMP_MAX_TIME = 20000;
//检测error次数
private static final int CHECK_ERROR_STATE_COUNT =
ANR_DUMP_MAX_TIME / CHECK_ERROR_STATE_INTERVAL;
//前台消息,超时2s的时候,说明卡住了
private static final long FOREGROUND_MSG_THRESHOLD = -2000;
//后台消息,超时2s的时候,说明卡住了
private static final long BACKGROUND_MSG_THRESHOLD = -10000;
//是否hasInstance
public static boolean hasInstance = false;
//是否是前台状态
private static boolean currentForeground = false;
//anr trace 文件路径
private static String sAnrTraceFilePath = "";
// 这个Hook Trace的方案,不仅仅可以用来查ANR问题,任何时候我们都可以手动向自己发送一个SIGQUIT信号,
// 从而hook到当时的Trace。Trace的内容对于我们排查线程死锁,线程异常,耗电等问题都非常有帮助。
//打印trace 文件路径 ,自己触发的
private static String sPrintTraceFilePath = "";
//监听
private static SignalAnrDetectedListener sSignalAnrDetectedListener;
//sApplication
private static Application sApplication;
//是否初始化了
private static boolean hasInit = false;
//anr发生时间,负值
private static long anrMessageWhen = 0L;
//anr发生时主线程处理的消息
private static String anrMessageString = "";
//endregion
static {
//加载trace-canary lib
System.loadLibrary("trace-canary");
}
//region 构造函数
public SignalAnrTracer(TraceConfig traceConfig) {
hasInstance = true;
sAnrTraceFilePath = traceConfig.anrTraceFilePath;
sPrintTraceFilePath = traceConfig.printTraceFilePath;
}
public SignalAnrTracer(Application application) {
hasInstance = true;
sApplication = application;
}
public SignalAnrTracer(Application application, String anrTraceFilePath, String printTraceFilePath) {
hasInstance = true;
sAnrTraceFilePath = anrTraceFilePath;
sPrintTraceFilePath = printTraceFilePath;
sApplication = application;
}
//endregion
/**
* AnrDumper.cc里 handleSignal
*/
@RequiresApi(api = Build.VERSION_CODES.M)
@Keep
private static void onANRDumped() {
//是否是前台
currentForeground = AppForegroundUtil.isInterestingToUser();
//是否是主线程堵塞了,需要report
boolean needReport = isMainThreadBlocked();
//有两种情况,主线程消息已经堵住了,或者开启一个线程检测状态 NOT_RESPONDING
//需要report
if (needReport) {
report(false);
} else {
// 监控到SIGQUIT后,我们在20秒内(20秒是ANR dump的timeout时间)不断轮询自己是否有NOT_RESPONDING flag
// ,一旦发现有这个flag,那么马上就可以认定发生了一次ANR。
new Thread(new Runnable() {
@Override
public void run() {
//开启了一个线程检查
checkErrorStateCycle();
}
}, CHECK_ANR_STATE_THREAD_NAME).start();
}
}
@Keep
private static void onANRDumpTrace() {
try {
MatrixUtil.printFileByLine(TAG, sAnrTraceFilePath);
} catch (Throwable t) {
MatrixLog.e(TAG, "onANRDumpTrace error: %s", t.getMessage());
}
}
//endregion
@Keep
private static void onPrintTrace() {
try {
MatrixUtil.printFileByLine(TAG, sPrintTraceFilePath);
} catch (Throwable t) {
MatrixLog.e(TAG, "onPrintTrace error: %s", t.getMessage());
}
}
/**
* @param fromProcessErrorState false代表主线程阻塞了
*/
private static void report(boolean fromProcessErrorState) {
try {
String stackTrace = Utils.getMainThreadJavaStackTrace();
if (sSignalAnrDetectedListener != null) {
sSignalAnrDetectedListener.onAnrDetected(stackTrace, anrMessageString, anrMessageWhen, fromProcessErrorState);
return;
}
TracePlugin plugin = Matrix.with().getPluginByClass(TracePlugin.class);
if (null == plugin) {
return;
}
String scene = AppMethodBeat.getVisibleScene();
JSONObject jsonObject = new JSONObject();
jsonObject = DeviceUtil.getDeviceInfo(jsonObject, Matrix.with().getApplication());
jsonObject.put(SharePluginInfo.ISSUE_STACK_TYPE, Constants.Type.SIGNAL_ANR);
jsonObject.put(SharePluginInfo.ISSUE_SCENE, scene);
jsonObject.put(SharePluginInfo.ISSUE_THREAD_STACK, stackTrace);
jsonObject.put(SharePluginInfo.ISSUE_PROCESS_FOREGROUND, currentForeground);
Issue issue = new Issue();
issue.setTag(SharePluginInfo.TAG_PLUGIN_EVIL_METHOD);
issue.setContent(jsonObject);
plugin.onDetectIssue(issue);
MatrixLog.e(TAG, "happens real ANR : %s ", jsonObject.toString());
} catch (JSONException e) {
MatrixLog.e(TAG, "[JSONException error: %s", e);
}
}
//通过消息时间,来判断是否到超出阈值
@RequiresApi(api = Build.VERSION_CODES.M)
private static boolean isMainThreadBlocked() {
try {
MessageQueue mainQueue = Looper.getMainLooper().getQueue();
Field field = mainQueue.getClass().getDeclaredField("mMessages");
field.setAccessible(true);
final Message mMessage = (Message) field.get(mainQueue);
if (mMessage != null) {
anrMessageString = mMessage.toString();
long when = mMessage.getWhen();
if (when == 0) {
return false;
}
long time = when - SystemClock.uptimeMillis();
anrMessageWhen = time;
long timeThreshold = BACKGROUND_MSG_THRESHOLD;
if (currentForeground) {
timeThreshold = FOREGROUND_MSG_THRESHOLD;
}
return time < timeThreshold;
}
} catch (Exception e) {
return false;
}
return false;
}
private static void checkErrorStateCycle() {
int checkErrorStateCount = 0;
//开启一个循环检测
while (checkErrorStateCount < CHECK_ERROR_STATE_COUNT) {
try {
checkErrorStateCount++;
boolean myAnr = checkErrorState();
if (myAnr) {
report(true);
break;
}
Thread