【Android 内存泄漏】KOOM 怎么实现线程泄漏的监控?

前言

前面分析了KOOM Java leak、native leak的监控思路,这篇继续分析它线程监控的整体思路。

使用

来看下使用方法,依旧是很简洁的三行代码即可开启监控:

 initMonitor()
 ThreadMonitor.startTrackAsync()
  ThreadMonitor.stop()

initMonitor

先看看怎么初始化监控器的:

 private fun initMonitor() {
    val listener = object : ThreadLeakListener {
      override fun onReport(leaks: MutableList<ThreadLeakRecord>) {
        leaks.forEach {
          MonitorLog.i(LOG_TAG, it.toString())
        }
        mLeakText.post {
          mLeakText.text = "leak threads: ${leaks.map { it.name }}"
        }
      }

      override fun onError(msg: String) {
        MonitorLog.e(LOG_TAG, msg)
        mErrorText.post {
          mErrorText.text = msg
        }
      }
    }
    if (!ThreadMonitor.isInitialized) {
      val config = ThreadMonitorConfig.Builder()
          .enableThreadLeakCheck(2 * 1000L, 5 * 1000L)
          .setListener(listener)
          .build()
      MonitorManager.addMonitorConfig(config)
    } else {
      ThreadMonitor.setListener(listener)
    }
  }

依旧是构建者模式设置一些参数,然后通过接口回调的方式开启监听。

看下ThreadLeakRecord张什么样子:

@Keep
data class ThreadLeakRecord(
    val tid: Int,
    val createTime: Long,
    val startTime: Long,
    val endTime: Long,
    val name: String,
    val createCallStack: String) {

  override fun toString(): String = StringBuilder().apply {
    append("tid: $tid\n")
    append("createTime: $createTime Byte\n")
    append("startTime: $startTime\n")
    append("endTime: $endTime\n")
    append("name: $name\n")
    append("createCallStack:\n")
    append(createCallStack)
  }.toString()
}

@Keep
data class ThreadLeakContainer(
    val type: String,
    val threads: MutableList<ThreadLeakRecord>)

ThreadMonitor.startTrackAsync()

  fun startTrackAsync() {
    getLoopHandler().postAtFrontOfQueue {
      startTrack()
    }
  }

这里handler执行的是postAtFrontOfQueue,这个方法会把当前任务post到消息队列的队列头里面,优先执行。

  fun startTrack() {
    if (handleNativeInit()) {
      mIsRunning = true
      startLoop(clearQueue = true, postAtFront = false, delayMillis = monitorConfig.startDelay)
    }
  }
private fun handleNativeInit(): Boolean {
    if (Build.VERSION.SDK_INT <= Build.VERSION_CODES.O || Build.VERSION.SDK_INT > Build
            .VERSION_CODES.R) {
      monitorConfig.listener?.onError("not support P below or R above now!")
      return false
    }
    if (!isArm64()) {
      monitorConfig.listener?.onError("support arm64 only!")
      return false
    }
    if (loadSoQuietly("koom-thread")) {
      MonitorLog.i(TAG, "loadLibrary success")
    } else {
      monitorConfig.listener?.onError("loadLibrary fail")
      return false
    }
    if (monitorConfig.disableNativeStack) {
      NativeHandler.disableNativeStack()
    }
    if (monitorConfig.disableJavaStack) {
      NativeHandler.disableJavaStack()
    }
    if (monitorConfig.enableNativeLog) {
      NativeHandler.enableNativeLog()
    }
    NativeHandler.setThreadLeakDelay(monitorConfig.threadLeakDelay)
    NativeHandler.start()
    MonitorLog.i(TAG, "init finish")
    return true
  }

这里看出线程监控当前只支持SDK 版本26-30,同时只支持arm64架构。

后面就是设置一些参数

std::atomic<bool> CallStack::disableJava;
std::atomic<bool> CallStack::disableNative;
void CallStack::DisableNative() { disableNative = true; }

位于cpp/src/common/callstack.cpp里面。
重点需要关注的是NativeHandler.start()方法:

JNIEXPORT void JNICALL
Java_com_kwai_performance_overhead_thread_monitor_NativeHandler_start(
    JNIEnv *env, jclass obj) {
  koom::Log::info("koom-thread", "start");
  koom::Start();
}

显示打印了一些日志,然后start:

Start


void Start() {
  if (isRunning) {
    return;
  }
  // 初始化数据
  delete sHookLooper;
  sHookLooper = new HookLooper();
  koom::ThreadHooker::Start();
  isRunning = true;
}

HookLooper


#include "hook_looper.h"

#include "koom.h"
#include "loop_item.h"
namespace koom {
const char *looper_tag = "koom-hook-looper";
HookLooper::HookLooper() : looper() { this->holder = new koom::ThreadHolder(); }
HookLooper::~HookLooper() { delete this->holder; }
void HookLooper::handle(int what, void *data) {
  looper::handle(what, data);
  switch (what) {
    case ACTION_ADD_THREAD: {
      koom::Log::info(looper_tag, "AddThread");
      auto info = static_cast<HookAddInfo *>(data);
      holder->AddThread(info->tid, info->pthread, info->is_thread_detached,
                        info->time, info->create_arg);
      delete info;
      break;
    }
    case ACTION_JOIN_THREAD: {
      koom::Log::info(looper_tag, "JoinThread");
      auto info = static_cast<HookInfo *>(data);
      holder->JoinThread(info->thread_id);
      delete info;
      break;
    }
    case ACTION_DETACH_THREAD: {
      koom::Log::info(looper_tag, "DetachThread");
      auto info = static_cast<HookInfo *>(data);
      holder->DetachThread(info->thread_id);
      delete info;
      break;
    }
    case ACTION_EXIT_THREAD: {
      koom::Log::info(looper_tag, "ExitThread");
      auto info = static_cast<HookExitInfo *>(data);
      holder->ExitThread(info->thread_id, info->threadName, info->time);
      delete info;
      break;
    }
    case ACTION_REFRESH: {
      koom::Log::info(looper_tag, "Refresh");
      auto info = static_cast<SimpleHookInfo *>(data);
      holder->ReportThreadLeak(info->time);
      delete info;
      break;
    }
    default: {
    }
  }
}
void HookLooper::post(int what, void *data) { looper::post(what, data); }
}  // namespace koom

这个类用来相应handler接收到的message,可以推导出,后面会有hook线程的工作,然后在线程的一些生命周期里面进行一些信息的记录和日志的打印。

InitHook


void ThreadHooker::InitHook() {
  koom::Log::info(thread_tag, "HookSo init hook");
  std::set<std::string> libs;
  DlopenCb::GetInstance().GetLoadedLibs(libs);
  HookLibs(libs, Constant::kDlopenSourceInit);
  DlopenCb::GetInstance().AddCallback(DlopenCallback);
}

关键代码有三行,下面逐一来分析:

GetLoadedLibs

void DlopenCb::GetLoadedLibs(std::set<std::string> &libs, bool refresh) {
  if (refresh) {
    std::string empty;
    Refresh(dlopen_source_get_libs, empty);
  }
  XH_LOG_INFO("GetLoadedLibs origin %d", hooked_libs.size());
  pthread_mutex_lock(&add_lib_mutex);
  std::copy(
      hooked_libs.begin(), hooked_libs.end(),
      std::inserter(libs, libs.begin()));
  pthread_mutex_unlock(&add_lib_mutex);
}

这里把一些已经加载的动态库添加到set容器中。

void ThreadHooker::Start() { ThreadHooker::InitHook(); }

HookLibs

看下HookLibs:

void ThreadHooker::HookLibs(std::set<std::string> &libs, int source) {
  koom::Log::info(thread_tag, "HookSo lib size %d", libs.size());
  if (libs.empty()) {
    return;
  }
  bool hooked = false;
  pthread_mutex_lock(&DlopenCb::hook_mutex);
  xhook_clear();
  for (const auto &lib : libs) {
    hooked |= ThreadHooker::RegisterSo(lib, source);
  }
  if (hooked) {
    int result = xhook_refresh(0);
    koom::Log::info(thread_tag, "HookSo lib Refresh result %d", result);
  }
  pthread_mutex_unlock(&DlopenCb::hook_mutex);
}

这里是遍历了set容器里面的所有库文件,然后hook这些lib。

这里面也进行了加锁操作,定义hooked bool变量,假如hook成功其中一个lib,就算hook成功。在后面进行xhook_refresh操作,然后打印日志。

RegisterSo

看起来是个很重要的函数,来分析下:

bool ThreadHooker::RegisterSo(const std::string &lib, int source) {
  if (IsLibIgnored(lib)) {
    return false;
  }
  auto lib_ctr = lib.c_str();
  koom::Log::info(thread_tag, "HookSo %d %s", source, lib_ctr);
  xhook_register(lib_ctr, "pthread_create",
                 reinterpret_cast<void *>(HookThreadCreate), nullptr);
  xhook_register(lib_ctr, "pthread_detach",
                 reinterpret_cast<void *>(HookThreadDetach), nullptr);
  xhook_register(lib_ctr, "pthread_join",
                 reinterpret_cast<void *>(HookThreadJoin), nullptr);
  xhook_register(lib_ctr, "pthread_exit",
                 reinterpret_cast<void *>(HookThreadExit), nullptr);

  return true;
}

果然,这里hook了这几个函数:

  • pthread_create
  • pthread_detach
  • pthread_join
  • pthread_exit

HookThreadCreate

int ThreadHooker::HookThreadCreate(pthread_t *tidp, const pthread_attr_t *attr,
                                   void *(*start_rtn)(void *), void *arg) {
  if (hookEnabled() && start_rtn != nullptr) {
    auto time = Util::CurrentTimeNs();
    koom::Log::info(thread_tag, "HookThreadCreate");
    auto *hook_arg = new StartRtnArg(arg, Util::CurrentTimeNs(), start_rtn);
    auto *thread_create_arg = hook_arg->thread_create_arg;
    void *thread = koom::CallStack::GetCurrentThread();
    if (thread != nullptr) {
      koom::CallStack::JavaStackTrace(thread,
                                      hook_arg->thread_create_arg->java_stack);
    }
    koom::CallStack::FastUnwind(thread_create_arg->pc,
                                koom::Constant::kMaxCallStackDepth);
    thread_create_arg->stack_time = Util::CurrentTimeNs() - time;
    return pthread_create(tidp, attr,
                          reinterpret_cast<void *(*)(void *)>(HookThreadStart),
                          reinterpret_cast<void *>(hook_arg));
  }
  return pthread_create(tidp, attr, start_rtn, arg);
}

HookThreadDetach

int ThreadHooker::HookThreadDetach(pthread_t t) {
  if (!hookEnabled()) return pthread_detach(t);

  int c_tid = (int)syscall(SYS_gettid);
  koom::Log::info(thread_tag, "HookThreadDetach c_tid:%0x", c_tid);

  auto info = new HookInfo(t, Util::CurrentTimeNs());
  sHookLooper->post(ACTION_DETACH_THREAD, info);
  return pthread_detach(t);
}

HookThreadJoin

int ThreadHooker::HookThreadJoin(pthread_t t, void **return_value) {
  if (!hookEnabled()) return pthread_join(t, return_value);

  int c_tid = (int)syscall(SYS_gettid);
  koom::Log::info(thread_tag, "HookThreadJoin c_tid:%0x", c_tid);

  auto info = new HookInfo(t, Util::CurrentTimeNs());
  sHookLooper->post(ACTION_JOIN_THREAD, info);
  return pthread_join(t, return_value);
}

HookThreadExit

void ThreadHooker::HookThreadExit(void *return_value) {
  if (!hookEnabled()) pthread_exit(return_value);

  koom::Log::info(thread_tag, "HookThreadExit");
  int tid = (int)syscall(SYS_gettid);
  char thread_name[16]{};
  prctl(PR_GET_NAME, thread_name);
  auto info =
      new HookExitInfo(pthread_self(), tid, thread_name, Util::CurrentTimeNs());
  sHookLooper->post(ACTION_EXIT_THREAD, info);
  pthread_exit(return_value);
}

上面hook了系统进行线程操作的函数,然后通过通过sHookLooper->post 的方式,把message post到一个handler里面进行处理。到这里也就印证了前面分析到的,HookLooper那一节handler的处理了。

AddCallback

回退一下,hook libs之后执行DlopenCb::GetInstance().AddCallback(DlopenCallback);
callback代码:

void ThreadHooker::DlopenCallback(std::set<std::string> &libs, int source,
                                  std::string &source_lib) {
  HookLibs(libs, source);
}


可以看到里面也是执行了HookLibs方法的代码。

总结

上述就是KOOM线程监控实现的逻辑走向,整体下来知道了整体的思路。但是其实还有相当多的细节值得我们深究和学习,后续会继续学习和分享。

  • 18
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

林树杰

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值