frameworks/base/services/core/java/com/android/server/am/LmkdConnection.java
/** * Max LMKD reply packet length in bytes * Used to hold the data for the statsd atoms logging * Must be in sync with statslog.h */ private static final int LMKD_REPLY_MAX_SIZE = 222;
// buffer to store incoming data private final ByteBuffer mInputBuf = ByteBuffer.allocate(LMKD_REPLY_MAX_SIZE);
// mutex to synchronize access to the socket //同步锁 private final Object mLmkdSocketLock = new Object();
// Input stream to parse the incoming data //接受进来的数据 private final DataInputStream mInputData = new DataInputStream( new ByteArrayInputStream(mInputBuf.array()))// object to protect mReplyBuf and to wait/notify when reply is received //定义同步锁 private final Object mReplyBufLock = new Object();// reply buffer @GuardedBy("mReplyBufLock") // 接受来自对端的数据 private ByteBuffer mReplyBuf = null;// 构造函数,获取到MessageQueue 和 LmkdConnectionListener 对象 LmkdConnection(MessageQueue msgQueue, LmkdConnectionListener listener) { mMsgQueue = msgQueue; mListener = listener; }//连接 public boolean connect() { synchronized (mLmkdSocketLock) { if (mLmkdSocket != null) { return true; } // temporary sockets and I/O streams final LocalSocket socket = openSocket(); if (socket == null) { Slog.w(TAG, "Failed to connect to lowmemorykiller, retry later"); return false; } final OutputStream ostream; final InputStream istream; try { ostream = socket.getOutputStream(); istream = socket.getInputStream(); } catch (IOException ex) { IoUtils.closeQuietly(socket); return false; } // execute onConnect callback if (mListener != null && !mListener.onConnect(ostream)) { Slog.w(TAG, "Failed to communicate with lowmemorykiller, retry later"); IoUtils.closeQuietly(socket); return false; } // connection established mLmkdSocket = socket; mLmkdOutputStream = ostream; mLmkdInputStream = istream; mMsgQueue.addOnFileDescriptorEventListener(mLmkdSocket.getFileDescriptor(), EVENT_INPUT | EVENT_ERROR, new MessageQueue.OnFileDescriptorEventListener() { public int onFileDescriptorEvents(FileDescriptor fd, int events) { return fileDescriptorEventHandler(fd, events); } } ); mLmkdSocketLock.notifyAll(); } return true; }
frameworks/base/services/core/java/com/android/server/am/LmkdStatsReporter.java
public static final int KILL_OCCURRED_MSG_SIZE = 80; public static final int STATE_CHANGED_MSG_SIZE = 8; private static final int PRESSURE_AFTER_KILL = 0; private static final int NOT_RESPONDING = 1; private static final int LOW_SWAP_AND_THRASHING = 2; private static final int LOW_MEM_AND_SWAP = 3; private static final int LOW_MEM_AND_THRASHING = 4; private static final int DIRECT_RECL_AND_THRASHING = 5; private static final int LOW_MEM_AND_SWAP_UTIL = 6; private static final int LOW_FILECACHE_AFTER_THRASHING = 7;
system/memory/lmkd/statslog.h
#define MAX_TASKNAME_LEN 128
/* * Max LMKD reply packet length in bytes * Notes about size calculation: * 4 bytes for packet type * 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat * 2 bytes for process name string size * MAX_TASKNAME_LEN bytes for the process name string * * Must be in sync with LmkdConnection.java */ #define LMKD_REPLY_MAX_SIZE 222/* LMK_MEMORY_STATS packet payload */ //LMK_MEMORY_STATS 有效载荷,数据记录 struct memory_stat { int64_t pgfault; int64_t pgmajfault; int64_t rss_in_bytes; int64_t cache_in_bytes; int64_t swap_in_bytes; int64_t process_start_time_ns; };// If you update this, also update the corresponding stats enum mapping and LmkdStatsReporter.java //需要和LmkdStatsReporter.java 保持一致 enum kill_reasons { NONE = -1, /* To denote no kill condition */ PRESSURE_AFTER_KILL = 0, NOT_RESPONDING, LOW_SWAP_AND_THRASHING, LOW_MEM_AND_SWAP, LOW_MEM_AND_THRASHING, DIRECT_RECL_AND_THRASHING, LOW_MEM_AND_SWAP_UTIL, LOW_FILECACHE_AFTER_THRASHING, KILL_REASON_COUNT };
/**
* Exchange a request/reply packets with lmkd
*
* @param req The buffer holding the request data to be sent
* @param repl The buffer to receive the reply
*/// Exchange a request/reply packets with lmkd
public boolean exchange(ByteBuffer req, ByteBuffer repl) {
if (repl == null) {
return write(req);
}boolean result = false;
// set reply buffer to user-defined one to fill it
synchronized (mReplyBufLock) {
mReplyBuf = repl;if (write(req)) {
try {
// wait for the reply
mReplyBufLock.wait();
result = (mReplyBuf != null);
} catch (InterruptedException ie) {
result = false;
}
}// reset reply buffer
mReplyBuf = null;
}
return result;
}
//向native 层写数据
private boolean write(ByteBuffer buf) {
synchronized (mLmkdSocketLock) {
try {
mLmkdOutputStream.write(buf.array(), 0, buf.position());
} catch (IOException ex) {
return false;
}
return true;
}
}
//从native 中读取数据
private int read(ByteBuffer buf) {
synchronized (mLmkdSocketLock) {
try {
return mLmkdInputStream.read(buf.array(), 0, buf.array().length);
} catch (IOException ex) {
}
return -1;
}
}
androidu/system/memory/lmkd/lmkd.cpp
做事情:
1. int init(void) 做初始化动作.
2. mp_event_psi 监听cpu的压力.
/*
* Write the state_changed over the data socket to be propagated via AMS to statsd
*///将state_changed 状态通过socket传给AMS
static void stats_write_lmk_state_changed(enum lmk_state state) {
ALOGE("stats_write_lmk_state_changed");
LMKD_CTRL_PACKET packet_state_changed;
const size_t len = lmkd_pack_set_state_changed(packet_state_changed, state);
if (len == 0) {
return;
}
for (int i = 0; i < MAX_DATA_CONN; i++) {
if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_STAT) {
ctrl_data_write(i, (char*)packet_state_changed, len);
}
}
}//通过psi事件进行排查
static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) { enum reclaim_state { NO_RECLAIM = 0, KSWAPD_RECLAIM, DIRECT_RECLAIM, }; static int64_t init_ws_refault; static int64_t prev_workingset_refault; static int64_t base_file_lru; static int64_t init_pgscan_kswapd; static int64_t init_pgscan_direct; static bool killing; static int thrashing_limit = thrashing_limit_pct; static struct zone_watermarks watermarks; static struct timespec wmark_update_tm; static struct wakeup_info wi; static struct timespec thrashing_reset_tm; static int64_t prev_thrash_growth = 0; static bool check_filecache = false; static int max_thrashing = 0; union meminfo mi; union vmstat vs; struct psi_data psi_data; struct timespec curr_tm; int64_t thrashing = 0; bool swap_is_low = false; //获取上报的level enum vmpressure_level level = (enum vmpressure_level)data; enum kill_reasons kill_reason = NONE; bool cycle_after_kill = false; enum reclaim_state reclaim = NO_RECLAIM; enum zone_watermark wmark = WMARK_NONE; char kill_desc[LINE_MAX]; bool cut_thrashing_limit = false; int min_score_adj = 0; int swap_util = 0; int64_t swap_low_threshold; long since_thrashing_reset_ms; int64_t workingset_refault_file; bool critical_stall = false; //获取当前时间 if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { ALOGE("Failed to get current time"); return; } //记录wakeup 事件 record_wakeup_time(&curr_tm, events ? Event : Polling, &wi); //last_kill_pid_or_fd 等待被kill bool kill_pending = is_kill_pending(); // 当没有达到杀进程,时间间隔没有达到最小的kill 进程阈值时,则直接跳过,等到下次再杀。 if (kill_pending && (kill_timeout_ms == 0 || get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms))) { /* Skip while still killing a process */ wi.skipped_wakeups++; goto no_kill; } /* * Process is dead or kill timeout is over, stop waiting. This has no effect if pidfds are * supported and death notification already caused waiting to stop. */ stop_wait_for_proc_kill(!kill_pending); if (vmstat_parse(&vs) < 0) { //获取不到vmstat 里面信息 ALOGE("Failed to parse vmstat!"); return; } /* Starting 5.9 kernel workingset_refault vmstat field was renamed workingset_refault_file */ workingset_refault_file = vs.field.workingset_refault ? : vs.field.workingset_refault_file; if (meminfo_parse(&mi) < 0) { //获取不到meminfo 里面信息 ALOGE("Failed to parse meminfo!"); return; } /* Reset states after process got killed */ //进程被终止后重置状态 if (killing) { killing = false; cycle_after_kill = true; /* Reset file-backed pagecache size and refault amounts after a kill */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = workingset_refault_file; thrashing_reset_tm = curr_tm; prev_thrash_growth = 0; } /* Check free swap levels */ if (swap_free_low_percentage) { swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100; swap_is_low = mi.field.free_swap < swap_low_threshold; } else { swap_low_threshold = 0; } /* Identify reclaim state */ if (vs.field.pgscan_direct > init_pgscan_direct) { init_pgscan_direct = vs.field.pgscan_direct; init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = DIRECT_RECLAIM; } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) { init_pgscan_kswapd = vs.field.pgscan_kswapd; reclaim = KSWAPD_RECLAIM; } else if (workingset_refault_file == prev_workingset_refault) { /* * Device is not thrashing and not reclaiming, bail out early until we see these stats * changing */ goto no_kill; } prev_workingset_refault = workingset_refault_file; /* * It's possible we fail to find an eligible process to kill (ex. no process is * above oom_adj_min). When this happens, we should retry to find a new process * for a kill whenever a new eligible process is available. This is especially * important for a slow growing refault case. While retrying, we should keep * monitoring new thrashing counter as someone could release the memory to mitigate * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing * counter by window counts. If the counter is still greater than thrashing limit, * we preserve the current prev_thrash counter so we will retry kill again. Otherwise, * we reset the prev_thrash counter so we will stop retrying. */ // since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm); //since_thrashing_reset_ms 内存抖动时间超过THRASHING_RESET_INTERVAL_MS if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) { long windows_passed; /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */ prev_thrash_growth = (workingset_refault_file - init_ws_refault) * 100 / (base_file_lru + 1); windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS); /* * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we * just crossed, which means there were no eligible processes to kill. We preserve the * counter in that case to ensure a kill if a new eligible process appears. */ if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) { prev_thrash_growth >>= windows_passed; } /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */ base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file; init_ws_refault = workingset_refault_file; thrashing_reset_tm = curr_tm; thrashing_limit = thrashing_limit_pct; } else { /* Calculate what % of the file-backed pagecache refaulted so far */ thrashing = (workingset_refault_file - init_ws_refault) * 100 / (base_file_lru + 1); } /* Add previous cycle's decayed thrashing amount */ thrashing += prev_thrash_growth; if (max_thrashing < thrashing) { max_thrashing = thrashing; } /* * Refresh watermarks once per min in case user updated one of the margins. * TODO: b/140521024 replace this periodic update with an API for AMS to notify LMKD * that zone watermarks were changed by the system software. */ if (watermarks.high_wmark == 0 || get_time_diff_ms(&wmark_update_tm, &curr_tm) > 60000) { struct zoneinfo zi; if (zoneinfo_parse(&zi) < 0) { ALOGE("Failed to parse zoneinfo!"); return; } calc_zone_watermarks(&zi, &watermarks); wmark_update_tm = curr_tm; } /* Find out which watermark is breached if any */ wmark = get_lowest_watermark(&mi, &watermarks); if (!psi_parse_mem(&psi_data)) { //critical_stall 状态 critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical; } /* * TODO: move this logic into a separate function * Decide if killing a process is necessary and record the reason */ if (cycle_after_kill && wmark < WMARK_LOW) { /* * Prevent kills not freeing enough memory which might lead to OOM kill. * This might happen when a process is consuming memory faster than reclaim can * free even after a kill. Mostly happens when running memory stress tests. */ kill_reason = PRESSURE_AFTER_KILL; //描述 strncpy(kill_desc, "min watermark is breached even after kill", sizeof(kill_desc)); } else if (level == VMPRESS_LEVEL_CRITICAL && events != 0) { /* * Device is too busy reclaiming memory which might lead to ANR. * Critical level is triggered when PSI complete stall (all tasks are blocked because * of the memory congestion) breaches the configured threshold. */ kill_reason = NOT_RESPONDING; strncpy(kill_desc, "device is not responding", sizeof(kill_desc)); } else if (swap_is_low && thrashing > thrashing_limit_pct) { /* Page cache is thrashing while swap is low */ kill_reason = LOW_SWAP_AND_THRASHING; snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64 "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)", mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing); /* Do not kill perceptible apps unless below min watermark or heavily thrashing */ if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } check_filecache = true; } else if (swap_is_low && wmark < WMARK_HIGH) { /* Both free memory and swap are low */ kill_reason = LOW_MEM_AND_SWAP; snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%" PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low", mi.field.free_swap * page_k, swap_low_threshold * page_k); /* Do not kill perceptible apps unless below min watermark or heavily thrashing */ if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } } else if (wmark < WMARK_HIGH && swap_util_max < 100 && (swap_util = calc_swap_utilization(&mi)) > swap_util_max) { /* * Too much anon memory is swapped out but swap is not low. * Non-swappable allocations created memory pressure. */ kill_reason = LOW_MEM_AND_SWAP_UTIL; snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap utilization" " is high (%d%% > %d%%)", wmark < WMARK_LOW ? "min" : "low", swap_util, swap_util_max); } else if (wmark < WMARK_HIGH && thrashing > thrashing_limit) { /* Page cache is thrashing while memory is low */ kill_reason = LOW_MEM_AND_THRASHING; snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%" PRId64 "%%)", wmark < WMARK_LOW ? "min" : "low", thrashing); cut_thrashing_limit = true; /* Do not kill perceptible apps unless thrashing at critical levels */ if (thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } check_filecache = true; } else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) { /* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */ kill_reason = DIRECT_RECL_AND_THRASHING; snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%" PRId64 "%%)", thrashing); cut_thrashing_limit = true; /* Do not kill perceptible apps unless thrashing at critical levels */ if (thrashing < thrashing_critical_pct) { min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } check_filecache = true; } else if (check_filecache) { int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k; if (file_lru_kb < filecache_min_kb) { /* File cache is too low after thrashing, keep killing background processes */ kill_reason = LOW_FILECACHE_AFTER_THRASHING; snprintf(kill_desc, sizeof(kill_desc), "filecache is low (%" PRId64 "kB < %" PRId64 "kB) after thrashing", file_lru_kb, filecache_min_kb); min_score_adj = PERCEPTIBLE_APP_ADJ + 1; } else { /* File cache is big enough, stop checking */ check_filecache = false; } } /* Kill a process if necessary */ if (kill_reason != NONE) { struct kill_info ki = { .kill_reason = kill_reason, .kill_desc = kill_desc, .thrashing = (int)thrashing, .max_thrashing = max_thrashing, }; /* Allow killing perceptible apps if the system is stalled */ if (critical_stall) { min_score_adj = 0; } //解析io psi_parse_io(&psi_data); //解析cpu psi_parse_cpu(&psi_data); int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm, &psi_data); if (pages_freed > 0) { killing = true; max_thrashing = 0; if (cut_thrashing_limit) { /* * Cut thrasing limit by thrashing_limit_decay_pct percentage of the current * thrashing limit until the system stops thrashing. */ thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100; } } } no_kill: /* Do not poll if kernel supports pidfd waiting */ if (is_waiting_for_kill()) { /* Pause polling if we are waiting for process death notification */ poll_params->update = POLLING_PAUSE; return; } /* * Start polling after initial PSI event; * extend polling while device is in direct reclaim or process is being killed; * do not extend when kswapd reclaims because that might go on for a long time * without causing memory pressure */ if (events || killing || reclaim == DIRECT_RECLAIM) { poll_params->update = POLLING_START; } /* Decide the polling interval */ if (swap_is_low || killing) { /* Fast polling during and after a kill or when swap is low */ poll_params->polling_interval_ms = PSI_POLL_PERIOD_SHORT_MS; } else { /* By default use long intervals */ poll_params->polling_interval_ms = PSI_POLL_PERIOD_LONG_MS; } } static std::string GetCgroupAttributePath(const char* attr) { std::string path; if (!CgroupGetAttributePath(attr, &path)) { ALOGE("Unknown cgroup attribute %s", attr); } return path; }
/*
* Find one process to kill at or above the given oom_score_adj level.
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm,
struct psi_data *pd) {
int i;
int killed_size = 0;
bool lmk_state_change_start = false;
bool choose_heaviest_task = kill_heaviest_task;
for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
struct proc *procp;
if (!choose_heaviest_task && i <= PERCEPTIBLE_APP_ADJ) {
/*
* If we have to choose a perceptible process, choose the heaviest one to
* hopefully minimize the number of victims.
*/
choose_heaviest_task = true;
}
while (true) {
procp = choose_heaviest_task ?
proc_get_heaviest(i) : proc_adj_tail(i);
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm, pd);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
stats_write_lmk_state_changed(STATE_START);
}
break;
}
}
if (killed_size) {
break;
}
}
if (lmk_state_change_start) {
stats_write_lmk_state_changed(STATE_STOP);
}
return killed_size;
}
//记录wakeup 事件 static void record_wakeup_time(struct timespec *tm, enum wakeup_reason reason, struct wakeup_info *wi) { wi->prev_wakeup_tm = wi->wakeup_tm; wi->wakeup_tm = *tm; if (reason == Event) { wi->last_event_tm = *tm; wi->wakeups_since_event = 0; wi->skipped_wakeups = 0; } else { wi->wakeups_since_event++; } }
static void mainloop(void) { struct event_handler_info* handler_info; struct polling_params poll_params; struct timespec curr_tm; struct epoll_event *evt; long delay = -1; poll_params.poll_handler = NULL; poll_params.paused_handler = NULL; //死循环 while (1) { struct epoll_event events[MAX_EPOLL_EVENTS]; int nevents; int i; //已经存在poll_handler时 if (poll_params.poll_handler) { bool poll_now; clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); if (poll_params.update == POLLING_RESUME) { /* Just transitioned into POLLING_RESUME, poll immediately. */ poll_now = true; nevents = 0; } else { /* Calculate next timeout */ //计算下次timeout 时间 //计算last_poll_tm 和 curr_tm 之间的差值 delay = get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm); //delay 和 polling_interval_ms 做比较,若delay小于了预设置polling_interval_ms值,则再减去delay,若小于polling_interval_ms值,则直接使用delay 值. delay = (delay < poll_params.polling_interval_ms) ? poll_params.polling_interval_ms - delay : poll_params.polling_interval_ms; /* Wait for events until the next polling timeout */ //等待epoll 事件上报. nevents = epoll_wait(epollfd, events, maxevents, delay); /* Update current time after wait */ clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); //上报的时间间隔,大于预设的间隔。作为有效时间 poll_now = (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >= poll_params.polling_interval_ms); } if (poll_now) { //根据epoll上报事件,进行处理数据 call_handler(poll_params.poll_handler, &poll_params, 0); } } else { //当poll_handler没有赋值时 kill_timeout_ms 表示是否达到timeout ,判断 last_kill_pid_or_fd 是否处理 if (kill_timeout_ms && is_waiting_for_kill()) { //获取当前时间 clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); /// delay = kill_timeout_ms - get_time_diff_ms(&last_kill_tm, &curr_tm); /* Wait for pidfds notification or kill timeout to expire */ nevents = (delay > 0) ? epoll_wait(epollfd, events, maxevents, delay) : 0; //判断是否超时 if (nevents == 0) { /* Kill notification timed out */ stop_wait_for_proc_kill(false); if (polling_paused(&poll_params)) { clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); poll_params.update = POLLING_RESUME; resume_polling(&poll_params, curr_tm); } } } else { /* Wait for events with no timeout */ //若没有超时,则等待下一次事件 nevents = epoll_wait(epollfd, events, maxevents, -1); } } if (nevents == -1) { //表示信号中断 if (errno == EINTR) continue; ALOGE("epoll_wait failed (errno=%d)", errno); continue; } /* * First pass to see if any data socket connections were dropped. * Dropped connection should be handled before any other events * to deallocate data connection and correctly handle cases when * connection gets dropped and reestablished in the same epoll cycle. * In such cases it's essential to handle connection closures first. */ for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) { //EPOLLHUP 表示对应的文件描述符被挂断,并且还有其他事件 if ((evt->events & EPOLLHUP) && evt->data.ptr) { ALOGI("lmkd data connection dropped"); handler_info = (struct event_handler_info*)evt->data.ptr; watchdog.start(); //将事件关闭 ctrl_data_close(handler_info->data); watchdog.stop(); } } /* Second pass to handle all other events */ for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) { //EPOLLERR:表示对应的文件描述符发生错误; if (evt->events & EPOLLERR) { ALOGD("EPOLLERR on event #%d", i); } //EPOLLHUP:表示对应的文件描述符被挂断; if (evt->events & EPOLLHUP) { /* This case was handled in the first pass */ continue; } //有处理的事件,则处理 if (evt->data.ptr) { handler_info = (struct event_handler_info*)evt->data.ptr; call_handler(handler_info, &poll_params, evt->events); } } } }
// 处理事件 static void call_handler(struct event_handler_info* handler_info, struct polling_params *poll_params, uint32_t events) { struct timespec curr_tm; //开启watchdog watchdog.start(); //更新状态,初始状态设置成 POLLING_DO_NOT_CHANGE poll_params->update = POLLING_DO_NOT_CHANGE; //更新poll_params->update状态 handler_info->handler(handler_info->data, events, poll_params); clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm); if (poll_params->poll_handler == handler_info) { poll_params->last_poll_tm = curr_tm; } //获取到poll_params->update 状态值 switch (poll_params->update) { case POLLING_START: /* * Poll for the duration of PSI_WINDOW_SIZE_MS after the * initial PSI event because psi events are rate-limited * at one per sec. */ poll_params->poll_start_tm = curr_tm; poll_params->poll_handler = handler_info; break; case POLLING_PAUSE: poll_params->paused_handler = handler_info; poll_params->poll_handler = NULL; break; case POLLING_RESUME: resume_polling(poll_params, curr_tm); break; case POLLING_DO_NOT_CHANGE: if (poll_params->poll_handler && get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) { /* Polled for the duration of PSI window, time to stop */ poll_params->poll_handler = NULL; } break; } watchdog.stop(); }
//移除epoll事件 static void stop_wait_for_proc_kill(bool finished) { struct epoll_event epev; //last_kill_pid_or_fd 小于0,表示 last_kill_pid_or_fd 已经处理 if (last_kill_pid_or_fd < 0) { return; } if (debug_process_killing) { struct timespec curr_tm; if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) { /* * curr_tm is used here merely to report kill duration, so this failure is not fatal. * Log an error and continue. */ ALOGE("Failed to get current time"); } if (finished) { ALOGI("Process got killed in %ldms", get_time_diff_ms(&last_kill_tm, &curr_tm)); } else { ALOGI("Stop waiting for process kill after %ldms", get_time_diff_ms(&last_kill_tm, &curr_tm)); } } if (pidfd_supported) { /* unregister fd */ if (epoll_ctl(epollfd, EPOLL_CTL_DEL, last_kill_pid_or_fd, &epev)) { // Log an error and keep going ALOGE("epoll_ctl for last killed process failed; errno=%d", errno); } maxevents--; close(last_kill_pid_or_fd); } last_kill_pid_or_fd = -1; }
//判断paused_handler是否赋值
static bool polling_paused(struct polling_params *poll_params) {
return poll_params->paused_handler != NULL;
}
//从paused中恢复到事件中 static void resume_polling(struct polling_params *poll_params, struct timespec curr_tm) { poll_params->poll_start_tm = curr_tm; poll_params->poll_handler = poll_params->paused_handler; poll_params->polling_interval_ms = PSI_POLL_PERIOD_SHORT_MS; poll_params->paused_handler = NULL; }
//init函数 static int init(void) { static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };
// /proc/zoneinfo struct reread_data file_data = { .filename = ZONEINFO_PATH, .fd = -1, }; struct epoll_event epev; int pidfd; int i; int ret; page_k = sysconf(_SC_PAGESIZE); if (page_k == -1) page_k = PAGE_SIZE; page_k /= 1024; //创建全局epoll文件句柄 epollfd = epoll_create(MAX_EPOLL_EVENTS); if (epollfd == -1) { ALOGE("epoll_create failed (errno=%d)", errno); return -1; } // mark data connections as not connected for (int i = 0; i < MAX_DATA_CONN; i++) { data_sock[i].sock = -1; } //打开lmkd socket文件句柄 ctrl_sock.sock = android_get_control_socket("lmkd"); if (ctrl_sock.sock < 0) { ALOGE("get lmkd control socket failed"); return -1; } ret = listen(ctrl_sock.sock, MAX_DATA_CONN); if (ret < 0) { ALOGE("lmkd control socket listen failed (errno=%d)", errno); return -1; } epev.events = EPOLLIN; ctrl_sock.handler_info.handler = ctrl_connect_handler; epev.data.ptr = (void *)&(ctrl_sock.handler_info); if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_sock.sock, &epev) == -1) { ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno); return -1; } maxevents++; has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK); use_inkernel_interface = has_inkernel_module; //若/sys/module/lowmemorykiller/parameters/minfree 不能访问,则使用in-kernel 里面的接口 if (use_inkernel_interface) { ALOGI("Using in-kernel low memory killer interface"); if (init_poll_kernel()) { epev.events = EPOLLIN; epev.data.ptr = (void*)&kernel_poll_hinfo; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, kpoll_fd, &epev) != 0) { ALOGE("epoll_ctl for lmk events failed (errno=%d)", errno); close(kpoll_fd); kpoll_fd = -1; } else { maxevents++; /* let the others know it does support reporting kills */ property_set("sys.lmk.reportkills", "1"); } } } else { //初始化init_monitors函数 if (!init_monitors()) { return -1; } /* let the others know it does support reporting kills */ //sys.lmk.reportkills 标记是否可以启动lmkd property_set("sys.lmk.reportkills", "1"); } for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) { procadjslot_list[i].next = &procadjslot_list[i]; procadjslot_list[i].prev = &procadjslot_list[i]; } memset(killcnt_idx, KILLCNT_INVALID_IDX, sizeof(killcnt_idx)); /* * Read zoneinfo as the biggest file we read to create and size the initial * read buffer and avoid memory re-allocations during memory pressure */ if (reread_file(&file_data) == NULL) { ALOGE("Failed to read %s: %s", file_data.filename, strerror(errno)); } /* check if kernel supports pidfd_open syscall */ pidfd = TEMP_FAILURE_RETRY(pidfd_open(getpid(), 0)); if (pidfd < 0) { pidfd_supported = (errno != ENOSYS); } else { pidfd_supported = true; close(pidfd); } ALOGI("Process polling is %s", pidfd_supported ? "supported" : "not supported" ); bool mbrain_init_success = init_mbrain(); ALOGI("Mbrain trigger success: %s", mbrain_init_success ? "success" : "fail" ); if (!lmkd_init_hook()) { ALOGE("Failed to initialize LMKD hooks."); return -1; } return 0; }
androidu/frameworks/base/services/core/java/com/android/server/am/ProcessList.java
做事:
1. writeLmkd
//向lmkd 写数据
private static boolean writeLmkd(ByteBuffer buf, ByteBuffer repl) {
if (!sLmkdConnection.isConnected()) {
// try to connect immediately and then keep retrying
sKillHandler.sendMessage(
sKillHandler.obtainMessage(KillHandler.LMKD_RECONNECT_MSG));// wait for connection retrying 3 times (up to 3 seconds)
// 等待连接重试3次(最多3秒)
if (!sLmkdConnection.waitForConnection(3 * LMKD_RECONNECT_DELAY_MS)) {
return false;
}
}
//调用LmkdConnection的exchange 方法
return sLmkdConnection.exchange(buf, repl);
}
/**
* Handle the unsolicited message from zygote.
*///监听来自Zygote的event的数据
private int handleZygoteMessages(FileDescriptor fd, int events) {
final int eventFd = fd.getInt$();
if ((events & EVENT_INPUT) != 0) {
// An incoming message from zygote
try {
final int len = Os.read(fd, mZygoteUnsolicitedMessage, 0,
mZygoteUnsolicitedMessage.length);
if (len > 0 && mZygoteSigChldMessage.length == Zygote.nativeParseSigChld(
mZygoteUnsolicitedMessage, len, mZygoteSigChldMessage)) {
mAppExitInfoTracker.handleZygoteSigChld(
mZygoteSigChldMessage[0] /* pid */,
mZygoteSigChldMessage[1] /* uid */,
mZygoteSigChldMessage[2] /* status */);
}
} catch (Exception e) {
Slog.w(TAG, "Exception in reading unsolicited zygote message: " + e);
}
}
return EVENT_INPUT;
}
system/memory/lmkd/libpsi/psi.cpp
做事:
1. init_psi_monitor 初始化psi 监控.
2. main 入口函数 ,watchdog 初始化.
3. mainloop函数
#define PSI_MON_FILE_MEMORY "/proc/pressure/memory"//监听psi int init_psi_monitor(enum psi_stall_type stall_type, int threshold_us, int window_us) { int fd; int res; char buf[256]; //打开/proc/pressure/memory 文件 fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC)); if (fd < 0) { ALOGE("No kernel psi monitor support (errno=%d)", errno); return -1; } switch (stall_type) { case (PSI_SOME): case (PSI_FULL): res = snprintf(buf, sizeof(buf), "%s %d %d", stall_type_name[stall_type], threshold_us, window_us); break; default: ALOGE("Invalid psi stall type: %d", stall_type); errno = EINVAL; goto err; } //超过buf的限制 if (res >= (ssize_t)sizeof(buf)) { ALOGE("%s line overflow for psi stall type '%s'", PSI_MON_FILE_MEMORY, stall_type_name[stall_type]); errno = EINVAL; goto err; } res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1)); if (res < 0) { ALOGE("%s write failed for psi stall type '%s'; errno=%d", PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno); goto err; } return fd; err: close(fd); return -1; }int register_psi_monitor(int epollfd, int fd, void* data) { int res; struct epoll_event epev; epev.events = EPOLLPRI; epev.data.ptr = data; //将fd 事件关联起来,新增描述符 res = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &epev); if (res < 0) { ALOGE("epoll_ctl for psi monitor failed; errno=%d", errno); } return res; }
//主函数 int main(int argc, char **argv) { if ((argc > 1) && argv[1] && !strcmp(argv[1], "--reinit")) { if (property_set(LMKD_REINIT_PROP, "")) { ALOGE("Failed to reset " LMKD_REINIT_PROP " property"); } return issue_reinit(); } //更新prop 一系列和lmk相关的prop值 if (!update_props()) { ALOGE("Failed to initialize props, exiting."); return -1; } //eventlog ctx = create_android_logger(KILLINFO_LOG_TAG); //初始化 if (!init()) { if (!use_inkernel_interface) { /* * MCL_ONFAULT pins pages as they fault instead of loading * everything immediately all at once. (Which would be bad, * because as of this writing, we have a lot of mapped pages we * never use.) Old kernels will see MCL_ONFAULT and fail with * EINVAL; we ignore this failure. * * N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT * pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault * in pages. */ /* CAP_IPC_LOCK required */ //锁住该实时进程在物理内存上全部地址空间。这将阻止Linux将这个内存页调度到交换空间(swap space), // 及时该进程已有一段时间没有访问这段空间。 if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) { ALOGW("mlockall failed %s", strerror(errno)); } /* CAP_NICE required */ struct sched_param param = { .sched_priority = 1, }; //调度模式先进先出 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { ALOGW("set SCHED_FIFO failed %s", strerror(errno)); } } if (init_reaper()) { ALOGI("Process reaper initialized with %d threads in the pool", reaper.thread_cnt()); } //watch dog 初始化 if (!watchdog.init()) { ALOGE("Failed to initialize the watchdog"); } //循环等待 mainloop(); } android_log_destroy(&ctx); ALOGI("exiting"); return 0; }
方案记录:
1.mem_free:剩余可分配内存大小,每个策略对应不同的剩余内存大小阈值
2. pgscan_kswapd:间接回收的内存页数,通过前后两次的数量差和时间差,计算出单位时间内kswpad回收的内存页数,来表示kswpad的活跃状态
3. swap_free:Swap的剩余大小,每个策略对应不同的交换分区内存大小阈值
4. pgscan_direct:直接回收的内存页数,通过前后两次的数量差和时间差,计算出单位时间内直接回收内存页数,来表示内存的压力情况
5. mem_cache
6. mem_available_kb
7. mem_total_kb
8. wmark
9 .swap_total
策略记录:
1. 当CPU 占有率超过 85% 开始杀. adj >=900
2. 使用kill进程的方法,不是forcestop 方法。
获取CPU 占用率
计算CPU 占用率:
安卓cpu信息查看与cpu占用率计算_android cpu 占用率-CSDN博客
代码:
1.androidu/system/memory/lmkd/include/lmkd.h
enum lmk_cmd {
LMK_TARGET = 0, /* Associate minfree with oom_adj_score */
LMK_PROCPRIO, /* Register a process and set its oom_adj_score */
LMK_PROCREMOVE, /* Unregister a process */
LMK_PROCPURGE, /* Purge all registered processes */
LMK_GETKILLCNT, /* Get number of kills */
LMK_SUBSCRIBE, /* Subscribe for asynchronous events */
LMK_PROCKILL, /* Unsolicited msg to subscribed clients on proc kills */
LMK_UPDATE_PROPS, /* Reinit properties */
LMK_STAT_KILL_OCCURRED, /* Unsolicited msg to subscribed clients on proc kills for statsd log */
LMK_STAT_STATE_CHANGED, /* Unsolicited msg to subscribed clients on state changed */
+ LMK_STAT_LEVEL_CHANGED,
};
-#define MAX_TARGETS 6
+#define MAX_TARGETS 16
+/* LMK_LEVEL packet payload */
+struct lmk_level_data {
+ int level;
+ int64_t free_swap_kb;
+ int64_t free_mem_kb;
+ int64_t pgscan_kswapd;
+ int64_t pgscan_direct;
+ int64_t mem_cache_kb;
+ int64_t mem_available_kb;
+ int64_t mem_total_kb;
+ int64_t swap_total_kb;
+ int wmark;
+};
+static inline size_t lmkd_pack_set_level_changed(LMKD_CTRL_PACKET packet,struct lmk_level_data data) {
+ packet[0] = htonl(LMK_STAT_LEVEL_CHANGED);
+ packet[1] = htonl(data.level);
+ packet[2] = htonl(data.free_swap_kb);
+ packet[3] = htonl(data.free_mem_kb);
+ packet[4] = htonl(data.pgscan_kswapd);
+ packet[5] = htonl(data.pgscan_direct);
+ packet[6] = htonl(data.mem_cache_kb);
+ packet[7] = htonl(data.mem_available_kb);
+ packet[8] = htonl(data.mem_total_kb);
+ packet[9] = htonl(data.swap_total_kb);
+ packet[10] = htonl(data.wmark);
+ return 3 * sizeof(int) + 8 * sizeof(int64_t);
+}
2. androidu/system/memory/lmkd/lmkd.cpp
/* Fields to parse in /proc/meminfo */
enum meminfo_field {
- MI_NR_FREE_PAGES = 0,
+ MI_MEM_TOTAL = 0,
+ MI_NR_FREE_PAGES,
+ MI_MEM_AVAILABLE,
MI_CACHED,
MI_SWAP_CACHED,
MI_BUFFERS,
static const char* const meminfo_field_names[MI_FIELD_COUNT] = {
+ "MemTotal:",
"MemFree:",
+ "MemAvailable:",
"Cached:",
"SwapCached:",
"Buffers:",
union meminfo {
struct {
+ int64_t mem_total_kb;
int64_t nr_free_pages;
+ int64_t nr_mem_available;
int64_t cached;
int64_t swap_cached;
int64_t buffers;
@@ -770,6 +776,7 @@ static ssize_t ctrl_data_read(int dsock_idx, char* buf, size_t bufsz, struct ucr
}
static int ctrl_data_write(int dsock_idx, char* buf, size_t bufsz) {
+ ALOGE("ctrl_data_write");
int ret = 0;
ret = TEMP_FAILURE_RETRY(write(data_sock[dsock_idx].sock, buf, bufsz));
@@ -789,6 +796,7 @@ static int ctrl_data_write(int dsock_idx, char* buf, size_t bufsz) {
* will receive this unsolicited notification.
*/
static void ctrl_data_write_lmk_kill_occurred(pid_t pid, uid_t uid) {
+ ALOGE("ctrl_data_write_lmk_kill_occurred pid:%i ,uid:%i", pid,uid);
LMKD_CTRL_PACKET packet;
size_t len = lmkd_pack_set_prockills(packet, pid, uid);
@@ -799,11 +807,26 @@ static void ctrl_data_write_lmk_kill_occurred(pid_t pid, uid_t uid) {
}
}
+
+static void ctrl_data_write_lmk_level_changed(lmk_level_data data) {
+ ALOGE("ctrl_data_write_lmk_level_changed");
+ LMKD_CTRL_PACKET packet;
+ size_t len = lmkd_pack_set_level_changed(packet,data);
+
+ for (int i = 0; i < MAX_DATA_CONN; i++) {
+ if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_KILL) {
+ ctrl_data_write(i, (char*)packet, len);
+ }
+ }
+}
+
/*
* Write the kill_stat/memory_stat over the data socket to be propagated via AMS to statsd
*/
static void stats_write_lmk_kill_occurred(struct kill_stat *kill_st,
struct memory_stat *mem_st) {
+ ALOGE("stats_write_lmk_kill_occurred");
LMK_KILL_OCCURRED_PACKET packet;
const size_t len = lmkd_pack_set_kill_occurred(packet, kill_st, mem_st);
if (len == 0) {
@@ -830,6 +853,7 @@ static void stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st
* Write the state_changed over the data socket to be propagated via AMS to statsd
*/
static void stats_write_lmk_state_changed(enum lmk_state state) {
+ ALOGE("stats_write_lmk_state_changed");
LMKD_CTRL_PACKET packet_state_changed;
const size_t len = lmkd_pack_set_state_changed(packet_state_changed, state);
if (len == 0) {
@@ -1473,6 +1497,7 @@ static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) {
}
static void ctrl_command_handler(int dsock_idx) {
+ ALOGE("ctrl_command_handler");
LMKD_CTRL_PACKET packet;
struct ucred cred;
int len;
@@ -1492,6 +1517,7 @@ static void ctrl_command_handler(int dsock_idx) {
}
cmd = lmkd_pack_get_cmd(packet);
+ ALOGE("ctrl_command_handler command code %d", cmd);
nargs = len / sizeof(int) - 1;
if (nargs < 0)
goto wronglen;
@@ -1578,6 +1604,7 @@ wronglen:
static void ctrl_data_handler(int data, uint32_t events,
struct polling_params *poll_params __unused) {
+ ALOGI("ctrl_data_handler");
if (events & EPOLLIN) {
static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) {
+
+ ALOGE("mp_event_psi");
if (!psi_parse_mem(&psi_data)) {
critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
}
+
+ //上报:mem_free, swap_free, pgscan_kswpad, pgscan_direct
+
+ ALOGE("mp_event_psi level:%s,free_swap_kb:%ld,free_mem_kb:%ld,pgscan_kswapd:%ld,pgscan_direct:%ld",level_name[level],get_free_swap(&mi) * page_k,mi.field.nr_free_pages * page_k,vs.field.pgscan_kswapd,vs.field.pgscan_direct);
+
+
+ ALOGE("mp_event_psi level:%s,page_k:%ld,mem_cache:%ld,mem_available_kb:%ld,mem_total_kb:%ld,swap_total:%ld,wmark:%d",level_name[level],page_k,mi.field.cached,mi.field.nr_mem_available,mi.field.mem_total_kb,mi.field.total_swap,wmark);
+
+ struct lmk_level_data level_data;
+ level_data.level = level;
+ level_data.free_swap_kb = get_free_swap(&mi) * page_k;
+ level_data.free_mem_kb = mi.field.nr_free_pages * page_k;
+ level_data.pgscan_kswapd = vs.field.pgscan_kswapd;
+ level_data.pgscan_direct = vs.field.pgscan_direct;
+ level_data.mem_cache_kb = mi.field.cached;
+ level_data.mem_available_kb = mi.field.nr_mem_available;
+ level_data.mem_total_kb = mi.field.mem_total_kb;
+ level_data.swap_total_kb = mi.field.total_swap;
+ level_data.wmark = wmark;
+ //if(level_changed){
+ ctrl_data_write_lmk_level_changed(level_data);
+ //}
3. androidu/frameworks/base/services/core/java/com/android/server/am/LmkdConnection.java
- private static final String TAG = TAG_WITH_CLASS_NAME ? "LmkdConnection" : TAG_AM;
+ private static final String TAG = "LmkdConnection";
4. androidu/frameworks/base/services/core/java/com/android/server/am/ProcessList.java
+import java.io.FileReader;
+import java.io.BufferedReader;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
- static final String TAG = TAG_WITH_CLASS_NAME ? "ProcessList" : TAG_AM;
+ static final String TAG = "ProcessList";
static final byte LMK_STATE_CHANGED = 9; // Msg to subscribed clients on state changed
+ static final byte LMK_LEVEL_CHANGED = 10;
LmkdStatsReporter.logStateChanged(state);
return true;
+ case LMK_LEVEL_CHANGED:
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED");
+ int level = inputData.readInt();
+ long freeSwapKb = inputData.readInt();
+ long freeMemKb = inputData.readInt();
+ long pgscanKswapd = inputData.readInt();
+ long pgscanDirect = inputData.readInt();
+ long memCacheKb = inputData.readInt();
+ long memAvailableKb = inputData.readInt();
+ long memTotalKb = inputData.readInt();
+ long swapTotalKb = inputData.readInt();
+ long wmark = inputData.readInt();
+
+
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED level:" + level +",freeSwapKb:"+freeSwapKb +",freeMemKb:"+freeMemKb+",pgscanKswapd:"+pgscanKswapd+",pgscanDirect:"+pgscanDirect+",memCacheKb:"+memCacheKb+",memAvailableKb:"+memAvailableKb+",memTotalKb:"+memTotalKb+",swapTotalKb:"+swapTotalKb+",wmark:"+wmark);
+
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED cpu rate:" + getCPURate());
+ return true;
default:
return false;
+
+ private void getKillApp(){
+ //cache current running process
+ ArrayList<ProcessRecord> procs = new ArrayList<>();
+ synchronized(mService) {
+ procs.addAll(mService.mProcessList.getLruProcessesLOSP());
+ }
+
+ for (int i = 0; i < procs.size(); i++) {
+ ProcessRecord app = procs.get(i);
+ final ProcessErrorStateRecord errState = app.mErrorstate;
+ if (app.getThread() == null) {
+ Slog.w(TAG,"skip"+ app.processName + " by app.getThread( ) == null");
+ continue;
+ }
+ if (errState.isCrashing()) {
+ Slog.w(TAG,"skip" + app.processName + " by crashing");
+ continue;
+ }
+
+ if (errState.isNotResponding()) {
+ Slog.w(TAG,"skip"+ app.processName +"by NotResponding");
+ continue;
+ }
+ ApplicationInfo appInfo = app.info;
+ int appAdj = app.mstate.getSetAdj();
+ ProcessStateRecord state - app.mstate;
+ if (DEBUG) {
+ Slog.d(TAG,"checking process:" + app.processName + ",adj:" + appAdj + ", state:" + state.getCurProcState()
+ +",adjtype:"+ state.getAdjType() + ",cached:"+ state.isCached());
+ }
+
+ if (appAdj < minAdj) {
+ // process adj lower than minAdj, this package should not be killed
+ skipPkgSet.add(appInfo.packageName);
+ if (killTargetMap.containsKey(appInfo.packageName)){
+ killTargetMap.remove(appInfo.packageName);
+ if (DEBUG_MORE) {
+ slog.d(TAG,"skip pkg, adj: " + appAdj + "procName:" + app.processName + "pkgname:" + appInfo.packageName);
+ }
+ continue;
+ }
+}
+
+
+
+ /**
+ * ^ 表示匹配行的开头。
+ * cpu 匹配 "cpu" 这个单词。
+ * \\s+ 匹配一个或多个空格字符。
+ * (\\d+\\s+){9} 匹配由一个或多个数字加上一个或多个空格字符组成的序列,重复9次。
+ * \\d+ 匹配一个或多个数字。
+ * $ 匹配行的结尾。 因此,整个正则表达式可以匹配以 "cpu" 开头,后面跟着10个由空格分隔的数字的行。
+ */
+private static String getCPURate() {
+ String path = "/proc/stat";// 系统CPU信息文件
+ long Totaljiffies[] = new long[2];
+ long totalIdle[] = new long[2];
+ FileReader fileReader = null;
+ BufferedReader bufferedReader = null;
+ Pattern pattern = Pattern.compile("^cpu\\s+(\\d+\\s+){9}\\d+$", Pattern.MULTILINE);
+ //正则表达式,只获取第一行
+ for (int i = 0; i < 2; i++) { //每一次调用分为两次获取 方便求差
+ Totaljiffies[i] = 0;
+ totalIdle[i] = 0;
+ try {
+ fileReader = new FileReader(path);
+ bufferedReader = new BufferedReader(fileReader, 8192);
+ String str;
+ while ((str = bufferedReader.readLine()) != null) { //读取stat信息
+ if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的
+ Matcher matcher = pattern.matcher(str);//直接获取第一行cpu开头的数据
+ // 不需要cpu0-7的,那样的话还得多几步运算
+ while (matcher.find()) {
+ String[] values = extractValues(matcher.group());
+ Totaljiffies[i] = sumValues(values);
+ totalIdle[i] = Long.parseLong(values[3]);
+ }
+ }
+ if(i==0){//第一次获取后进行延时等待系统更新信息
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } finally {
+ if (bufferedReader != null) {
+ try {
+ bufferedReader.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+ double rate = 0;
+ if (Totaljiffies[1] > Totaljiffies[0] ) {//正常情况下第二次总的jiffies一定比第一次获得的数据大
+ rate = 1.0 * ((Totaljiffies[1] - totalIdle[1]) - (Totaljiffies[0] - totalIdle[0]))
+ / (Totaljiffies[1] - Totaljiffies[0]);
+ }
+ return String.valueOf(rate);
+ }
+ /**
+ * 头行去掉cpu,合并成String[]数组
+ * @param input
+ * @return
+ */
+ public static String[] extractValues(String input) {
+ String[] parts = input.split("\\s+");
+ String[] values = new String[parts.length - 1]; // 去掉 "cpu",所以长度减一
+
+ System.arraycopy(parts, 1, values, 0, parts.length - 1);
+ return values;
+ }
+ /**
+ * 求数组和
+ * @param input
+ * @return
+ */
+ public static Long sumValues(String input[]) {
+ Long sum = Long.valueOf(0);
+ for (String value : input) {
+ sum += Integer.parseInt(value);
+ }
+ return sum;
+ }
5. androidu/system/core/rootdir/init.rc
修改权限:
# make the PSI monitor accessible to others
chown system system /proc/pressure/cpu
chmod 0664 /proc/pressure/cpu
应用待机分组:
frameworks/base/core/java/android/app/usage/UsageStatsManager.java
getAppStandbyBucket(String packageName)
/** * The app was used very recently, currently in use or likely to be used very soon. Standby * bucket values that are ≤ {@link #STANDBY_BUCKET_ACTIVE} will not be throttled by the * system while they are in this bucket. Buckets > {@link #STANDBY_BUCKET_ACTIVE} will most * likely be restricted in some way. For instance, jobs and alarms may be deferred. * @see #getAppStandbyBucket() */ //应用目前正在使用中,或者最近刚刚使用过 public static final int STANDBY_BUCKET_ACTIVE = 10; /** * The app was used recently and/or likely to be used in the next few hours. Restrictions will * apply to these apps, such as deferral of jobs and alarms. * @see #getAppStandbyBucket() */ //应用会定期使用 public static final int STANDBY_BUCKET_WORKING_SET = 20; /** * The app was used in the last few days and/or likely to be used in the next few days. * Restrictions will apply to these apps, such as deferral of jobs and alarms. The delays may be * greater than for apps in higher buckets (lower bucket value). Bucket values > * {@link #STANDBY_BUCKET_FREQUENT} may additionally have network access limited. * @see #getAppStandbyBucket() */ //应用会经常使用,但不会每天使用 public static final int STANDBY_BUCKET_FREQUENT = 30; /** * The app has not be used for several days and/or is unlikely to be used for several days. * Apps in this bucket will have more restrictions, including network restrictions, except * during certain short periods (at a minimum, once a day) when they are allowed to execute * jobs, access the network, etc. * @see #getAppStandbyBucket() */ //应用不经常使用 public static final int STANDBY_BUCKET_RARE = 40; /** * The app has not be used for several days, is unlikely to be used for several days, and has * been misbehaving in some manner. * Apps in this bucket will have the most restrictions, including network restrictions and * additional restrictions on jobs. * <p> Note: this bucket is not enabled in {@link Build.VERSION_CODES#R}. * @see #getAppStandbyBucket() */ //受限制 public static final int STANDBY_BUCKET_RESTRICTED = 45; /** * The app has never been used. * {@hide} */ @SystemApi //已安装但从未运行过的应用,系统会对这些应用施加严格的限制 public static final int STANDBY_BUCKET_NEVER = 50;
计算CPU 占有率:
/**
* ^ 表示匹配行的开头。
* cpu 匹配 "cpu" 这个单词。
* \\s+ 匹配一个或多个空格字符。
* (\\d+\\s+){9} 匹配由一个或多个数字加上一个或多个空格字符组成的序列,重复9次。
* \\d+ 匹配一个或多个数字。
* $ 匹配行的结尾。 因此,整个正则表达式可以匹配以 "cpu" 开头,后面跟着10个由空格分隔的数字的行。
*/
private static String getCPURate() {
String path = "/proc/stat";// 系统CPU信息文件
long Totaljiffies[] = new long[2];
long totalIdle[] = new long[2];
FileReader fileReader = null;
BufferedReader bufferedReader = null;
Pattern pattern = Pattern.compile("^cpu\\s+(\\d+\\s+){9}\\d+$", Pattern.MULTILINE);
//正则表达式,只获取第一行
for (int i = 0; i < 2; i++) { //每一次调用分为两次获取 方便求差
Totaljiffies[i] = 0;
totalIdle[i] = 0;
try {
fileReader = new FileReader(path);
bufferedReader = new BufferedReader(fileReader, 8192);
String str;
while ((str = bufferedReader.readLine()) != null) { //读取stat信息
if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的
Matcher matcher = pattern.matcher(str);//直接获取第一行cpu开头的数据
// 不需要cpu0-7的,那样的话还得多几步运算
while (matcher.find()) {
String[] values = extractValues(matcher.group());
Totaljiffies[i] = sumValues(values);
totalIdle[i] = Long.parseLong(values[3]);
}
}
if(i==0){//第一次获取后进行延时等待系统更新信息
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (bufferedReader != null) {
try {
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
double rate = 0;
if (Totaljiffies[1] > Totaljiffies[0] ) {//正常情况下第二次总的jiffies一定比第一次获得的数据大
rate = 1.0 * ((Totaljiffies[1] - totalIdle[1]) - (Totaljiffies[0] - totalIdle[0]))
/ (Totaljiffies[1] - Totaljiffies[0]);
}
return String.valueOf(rate);
}
/**
* 头行去掉cpu,合并成String[]数组
* @param input
* @return
*/
public static String[] extractValues(String input) {
String[] parts = input.split("\\s+");
String[] values = new String[parts.length - 1]; // 去掉 "cpu",所以长度减一
System.arraycopy(parts, 1, values, 0, parts.length - 1);
return values;
}
/**
* 求数组和
* @param input
* @return
*/
public static Long sumValues(String input[]) {
Long sum = Long.valueOf(0);
for (String value : input) {
sum += Integer.parseInt(value);
}
return sum;
}
一、epoll_wait函数
1.1、函数定义
int epoll_wait(int epfd, struct epoll_event * events, int maxevents, int timeout);
作用: 等待监听的所有fd相应事件的产生。
1.2、参数详解:
1) int epfd: epoll_create()函数返回的epoll实例的句柄。
2) struct epoll_event * events: 接口的返回参数,epoll把发生的事件的集合从内核复制到 events数组中。events数组是一个用户分配好大小的数组,数组长度大于等于maxevents。(events不可以是空指针,内核只负责把数据复制到这个 events数组中,不会去帮助我们在用户态中分配内存)
3) int maxevents: 表示本次可以返回的最大事件数目,通常maxevents参数与预分配的events数组的大小是相等的。
4) int timeout: 表示在没有检测到事件发生时最多等待的时间,超时时间(>=0),单位是毫秒ms,-1表示阻塞,0表示不阻塞。
1.3、返回值:
成功返回需要处理的事件数目。失败返回0,表示等待超时。
二. epoll事件延迟:
在Linux中,epoll
本身并没有优先级的概念,因为 epoll
是一种事件通知机制,它主要用于高效地处理大量文件描述符上的 I/O 事件。但是,您可以通过一些方法来提高 epoll
监控的文件描述符的优先级或处理速度,以改善整体性能:
-
优化事件处理逻辑:确保事件处理逻辑高效,避免阻塞和耗时的操作。尽量减少事件处理器中的计算量,确保事件得到及时处理。
-
合理设置超时时间:在调用
epoll_wait
时,合理设置超时时间,以便在有事件发生时及时返回,避免不必要的等待。 -
合理设置触发模式:根据实际需求选择合适的触发模式(如边缘触发模式
EPOLLET
或水平触发模式EPOLLIN
、EPOLLOUT
),以提高事件的响应速度。 -
使用多线程/进程:将
epoll
监控的文件描述符的处理逻辑放在独立的线程或进程中,以提高并发处理能力。 -
系统优化:对系统进行优化,包括调整内核参数、增加系统资源等,以提高整体性能。
-
使用更高级别的框架:考虑使用更高级别的框架或库,如 libevent、Boost.Asio 等,它们可以在
epoll
的基础上提供更加方便和高效的事件处理机制。 -
性能分析和调优:使用性能分析工具对程序进行分析,找出性能瓶颈并进行优化,以提高
epoll
的处理速度。
需要注意的是,尽管可以通过上述方法来提高 epoll
监控的文件描述符的处理速度和效率,但是 epoll
本身并没有优先级的概念。您应该根据具体的应用场景和需求,结合上述建议来优化 epoll
的性能。