LMKD 调试记录

惠(xi)斌

已于 2024-07-12 10:33:29 修改

阅读量777

点赞数 15

文章标签： python 开发语言

于 2024-06-03 18:00:42 首次发布

本文链接：https://blog.csdn.net/huibin147852369/article/details/139075510

版权

frameworks/base/services/core/java/com/android/server/am/LmkdConnection.java

    /**
     * Max LMKD reply packet length in bytes
     * Used to hold the data for the statsd atoms logging
     * Must be in sync with statslog.h
     */
    private static final int LMKD_REPLY_MAX_SIZE = 222;

    // buffer to store incoming data
    private final ByteBuffer mInputBuf =
            ByteBuffer.allocate(LMKD_REPLY_MAX_SIZE);

    // mutex to synchronize access to the socket
    //同步锁
    private final Object mLmkdSocketLock = new Object();

    // Input stream to parse the incoming data
    //接受进来的数据
    private final DataInputStream mInputData = new DataInputStream(
            new ByteArrayInputStream(mInputBuf.array()))

   // object to protect mReplyBuf and to wait/notify when reply is received
    //定义同步锁
    private final Object mReplyBufLock = new Object();

    // reply buffer
    @GuardedBy("mReplyBufLock")
    // 接受来自对端的数据
    private ByteBuffer mReplyBuf = null;

   // 构造函数,获取到MessageQueue 和 LmkdConnectionListener 对象
    LmkdConnection(MessageQueue msgQueue, LmkdConnectionListener listener) {
        mMsgQueue = msgQueue;
        mListener = listener;
    }

   //连接
    public boolean connect() {
        synchronized (mLmkdSocketLock) {
            if (mLmkdSocket != null) {
                return true;
            }
            // temporary sockets and I/O streams
            final LocalSocket socket = openSocket();

            if (socket == null) {
                Slog.w(TAG, "Failed to connect to lowmemorykiller, retry later");
                return false;
            }

            final OutputStream ostream;
            final InputStream istream;
            try {
                ostream = socket.getOutputStream();
                istream = socket.getInputStream();
            } catch (IOException ex) {
                IoUtils.closeQuietly(socket);
                return false;
            }
            // execute onConnect callback
            if (mListener != null && !mListener.onConnect(ostream)) {
                Slog.w(TAG, "Failed to communicate with lowmemorykiller, retry later");
                IoUtils.closeQuietly(socket);
                return false;
            }
            // connection established
            mLmkdSocket = socket;
            mLmkdOutputStream = ostream;
            mLmkdInputStream = istream;
            mMsgQueue.addOnFileDescriptorEventListener(mLmkdSocket.getFileDescriptor(),
                EVENT_INPUT | EVENT_ERROR,
                new MessageQueue.OnFileDescriptorEventListener() {
                    public int onFileDescriptorEvents(FileDescriptor fd, int events) {
                        return fileDescriptorEventHandler(fd, events);
                    }
                }
            );
            mLmkdSocketLock.notifyAll();
        }
        return true;
    }

frameworks/base/services/core/java/com/android/server/am/LmkdStatsReporter.java

    public static final int KILL_OCCURRED_MSG_SIZE = 80;
    public static final int STATE_CHANGED_MSG_SIZE = 8;

    private static final int PRESSURE_AFTER_KILL = 0;
    private static final int NOT_RESPONDING = 1;
    private static final int LOW_SWAP_AND_THRASHING = 2;
    private static final int LOW_MEM_AND_SWAP = 3;
    private static final int LOW_MEM_AND_THRASHING = 4;
    private static final int DIRECT_RECL_AND_THRASHING = 5;
    private static final int LOW_MEM_AND_SWAP_UTIL = 6;
    private static final int LOW_FILECACHE_AFTER_THRASHING = 7;

system/memory/lmkd/statslog.h

#define MAX_TASKNAME_LEN 128

/*
 * Max LMKD reply packet length in bytes
 * Notes about size calculation:
 * 4 bytes for packet type
 * 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
 * 2 bytes for process name string size
 * MAX_TASKNAME_LEN bytes for the process name string
 *
 * Must be in sync with LmkdConnection.java
 */
#define LMKD_REPLY_MAX_SIZE 222

/* LMK_MEMORY_STATS packet payload */
//LMK_MEMORY_STATS 有效载荷，数据记录
struct memory_stat {
    int64_t pgfault;
    int64_t pgmajfault;
    int64_t rss_in_bytes;
    int64_t cache_in_bytes;
    int64_t swap_in_bytes;
    int64_t process_start_time_ns;
};

// If you update this, also update the corresponding stats enum mapping and LmkdStatsReporter.java
//需要和LmkdStatsReporter.java 保持一致
enum kill_reasons {
    NONE = -1, /* To denote no kill condition */
    PRESSURE_AFTER_KILL = 0,
    NOT_RESPONDING,
    LOW_SWAP_AND_THRASHING,
    LOW_MEM_AND_SWAP,
    LOW_MEM_AND_THRASHING,
    DIRECT_RECL_AND_THRASHING,
    LOW_MEM_AND_SWAP_UTIL,
    LOW_FILECACHE_AFTER_THRASHING,
    KILL_REASON_COUNT
};

/**
* Exchange a request/reply packets with lmkd
*
* @param req The buffer holding the request data to be sent
* @param repl The buffer to receive the reply
*/

// Exchange a request/reply packets with lmkd
public boolean exchange(ByteBuffer req, ByteBuffer repl) {
if (repl == null) {
return write(req);
}

boolean result = false;
// set reply buffer to user-defined one to fill it
synchronized (mReplyBufLock) {
mReplyBuf = repl;

if (write(req)) {
try {
// wait for the reply
mReplyBufLock.wait();
result = (mReplyBuf != null);
} catch (InterruptedException ie) {
result = false;
}
}

// reset reply buffer
mReplyBuf = null;
}
return result;
}

//向native 层写数据

private boolean write(ByteBuffer buf) {
synchronized (mLmkdSocketLock) {
try {
mLmkdOutputStream.write(buf.array(), 0, buf.position());
} catch (IOException ex) {
return false;
}
return true;
}
}

//从native 中读取数据

private int read(ByteBuffer buf) {
synchronized (mLmkdSocketLock) {
try {
return mLmkdInputStream.read(buf.array(), 0, buf.array().length);
} catch (IOException ex) {
}
return -1;
}
}

androidu/system/memory/lmkd/lmkd.cpp

做事情:

1. int init(void) 做初始化动作.

2. mp_event_psi 监听cpu的压力.

/*
* Write the state_changed over the data socket to be propagated via AMS to statsd
*/

//将state_changed 状态通过socket传给AMS
static void stats_write_lmk_state_changed(enum lmk_state state) {
ALOGE("stats_write_lmk_state_changed");
LMKD_CTRL_PACKET packet_state_changed;
const size_t len = lmkd_pack_set_state_changed(packet_state_changed, state);
if (len == 0) {
return;
}
for (int i = 0; i < MAX_DATA_CONN; i++) {
if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_STAT) {
ctrl_data_write(i, (char*)packet_state_changed, len);
}
}
}

//通过psi事件进行排查

static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) {
    enum reclaim_state {
        NO_RECLAIM = 0,
        KSWAPD_RECLAIM,
        DIRECT_RECLAIM,
    };
    static int64_t init_ws_refault;
    static int64_t prev_workingset_refault;
    static int64_t base_file_lru;
    static int64_t init_pgscan_kswapd;
    static int64_t init_pgscan_direct;
    static bool killing;
    static int thrashing_limit = thrashing_limit_pct;
    static struct zone_watermarks watermarks;
    static struct timespec wmark_update_tm;
    static struct wakeup_info wi;
    static struct timespec thrashing_reset_tm;
    static int64_t prev_thrash_growth = 0;
    static bool check_filecache = false;
    static int max_thrashing = 0;

    union meminfo mi;
    union vmstat vs;
    struct psi_data psi_data;
    struct timespec curr_tm;
    int64_t thrashing = 0;
    bool swap_is_low = false;
    //获取上报的level
    enum vmpressure_level level = (enum vmpressure_level)data;
   
    enum kill_reasons kill_reason = NONE;
    bool cycle_after_kill = false;
    enum reclaim_state reclaim = NO_RECLAIM;
    enum zone_watermark wmark = WMARK_NONE;
    char kill_desc[LINE_MAX];
    bool cut_thrashing_limit = false;
    int min_score_adj = 0;
    int swap_util = 0;
    int64_t swap_low_threshold;
    long since_thrashing_reset_ms;
    int64_t workingset_refault_file;
    bool critical_stall = false;
    //获取当前时间
    if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
        ALOGE("Failed to get current time");
        return;
    }
    //记录wakeup 事件
    record_wakeup_time(&curr_tm, events ? Event : Polling, &wi);
    //last_kill_pid_or_fd 等待被kill
    bool kill_pending = is_kill_pending();
// 当没有达到杀进程，时间间隔没有达到最小的kill 进程阈值时，则直接跳过，等到下次再杀。
    if (kill_pending && (kill_timeout_ms == 0 ||
        get_time_diff_ms(&last_kill_tm, &curr_tm) < static_cast<long>(kill_timeout_ms))) {
        /* Skip while still killing a process */
        wi.skipped_wakeups++;
        goto no_kill;
    }
    /*
     * Process is dead or kill timeout is over, stop waiting. This has no effect if pidfds are
     * supported and death notification already caused waiting to stop.
     */
    stop_wait_for_proc_kill(!kill_pending);

    if (vmstat_parse(&vs) < 0) {
        //获取不到vmstat 里面信息
        ALOGE("Failed to parse vmstat!");
        return;
    }
    /* Starting 5.9 kernel workingset_refault vmstat field was renamed workingset_refault_file */
    workingset_refault_file = vs.field.workingset_refault ? : vs.field.workingset_refault_file;

    if (meminfo_parse(&mi) < 0) {
        //获取不到meminfo 里面信息
        ALOGE("Failed to parse meminfo!");
        return;
    }

    /* Reset states after process got killed */
    //进程被终止后重置状态
    if (killing) {
        killing = false;
        cycle_after_kill = true;
        /* Reset file-backed pagecache size and refault amounts after a kill */
        base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file;
        init_ws_refault = workingset_refault_file;
        thrashing_reset_tm = curr_tm;
        prev_thrash_growth = 0;
    }

    /* Check free swap levels */
    if (swap_free_low_percentage) {
        swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
        swap_is_low = mi.field.free_swap < swap_low_threshold;
    } else {
        swap_low_threshold = 0;
    }

    /* Identify reclaim state */
    if (vs.field.pgscan_direct > init_pgscan_direct) {
        init_pgscan_direct = vs.field.pgscan_direct;
        init_pgscan_kswapd = vs.field.pgscan_kswapd;
        reclaim = DIRECT_RECLAIM;
    } else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) {
        init_pgscan_kswapd = vs.field.pgscan_kswapd;
        reclaim = KSWAPD_RECLAIM;
    } else if (workingset_refault_file == prev_workingset_refault) {
        /*
         * Device is not thrashing and not reclaiming, bail out early until we see these stats
         * changing
         */
        goto no_kill;
    }

    prev_workingset_refault = workingset_refault_file;

     /*
     * It's possible we fail to find an eligible process to kill (ex. no process is
     * above oom_adj_min). When this happens, we should retry to find a new process
     * for a kill whenever a new eligible process is available. This is especially
     * important for a slow growing refault case. While retrying, we should keep
     * monitoring new thrashing counter as someone could release the memory to mitigate
     * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing
     * counter by window counts. If the counter is still greater than thrashing limit,
     * we preserve the current prev_thrash counter so we will retry kill again. Otherwise,
     * we reset the prev_thrash counter so we will stop retrying.
     */
   //
    since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm);
    //since_thrashing_reset_ms 内存抖动时间超过THRASHING_RESET_INTERVAL_MS
    if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) {
        long windows_passed;
        /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */
        prev_thrash_growth = (workingset_refault_file - init_ws_refault) * 100
                            / (base_file_lru + 1);
        windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS);
        /*
         * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we
         * just crossed, which means there were no eligible processes to kill. We preserve the
         * counter in that case to ensure a kill if a new eligible process appears.
         */
        if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) {
            prev_thrash_growth >>= windows_passed;
        }

        /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */
        base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file;
        init_ws_refault = workingset_refault_file;
        thrashing_reset_tm = curr_tm;
        thrashing_limit = thrashing_limit_pct;
    } else {
        /* Calculate what % of the file-backed pagecache refaulted so far */
        thrashing = (workingset_refault_file - init_ws_refault) * 100 / (base_file_lru + 1);
    }
    /* Add previous cycle's decayed thrashing amount */
    thrashing += prev_thrash_growth;
    if (max_thrashing < thrashing) {
        max_thrashing = thrashing;
    }

    /*
     * Refresh watermarks once per min in case user updated one of the margins.
     * TODO: b/140521024 replace this periodic update with an API for AMS to notify LMKD
     * that zone watermarks were changed by the system software.
     */
    if (watermarks.high_wmark == 0 || get_time_diff_ms(&wmark_update_tm, &curr_tm) > 60000) {
        struct zoneinfo zi;

        if (zoneinfo_parse(&zi) < 0) {
            ALOGE("Failed to parse zoneinfo!");
            return;
        }

        calc_zone_watermarks(&zi, &watermarks);
        wmark_update_tm = curr_tm;
    }

    /* Find out which watermark is breached if any */
    wmark = get_lowest_watermark(&mi, &watermarks);

    if (!psi_parse_mem(&psi_data)) {
        //critical_stall 状态
        critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
    }
    /*
     * TODO: move this logic into a separate function
     * Decide if killing a process is necessary and record the reason
     */
    if (cycle_after_kill && wmark < WMARK_LOW) {
        /*
         * Prevent kills not freeing enough memory which might lead to OOM kill.
         * This might happen when a process is consuming memory faster than reclaim can
         * free even after a kill. Mostly happens when running memory stress tests.
         */
        kill_reason = PRESSURE_AFTER_KILL;
        //描述
        strncpy(kill_desc, "min watermark is breached even after kill", sizeof(kill_desc));
    } else if (level == VMPRESS_LEVEL_CRITICAL && events != 0) {
        /*
         * Device is too busy reclaiming memory which might lead to ANR.
         * Critical level is triggered when PSI complete stall (all tasks are blocked because
         * of the memory congestion) breaches the configured threshold.
         */
        kill_reason = NOT_RESPONDING;
        strncpy(kill_desc, "device is not responding", sizeof(kill_desc));
    } else if (swap_is_low && thrashing > thrashing_limit_pct) {
        /* Page cache is thrashing while swap is low */
        kill_reason = LOW_SWAP_AND_THRASHING;
        snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
            "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
            mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
        /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
        if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
        }
        check_filecache = true;
    } else if (swap_is_low && wmark < WMARK_HIGH) {
        /* Both free memory and swap are low */
        kill_reason = LOW_MEM_AND_SWAP;
        snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
            PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low",
            mi.field.free_swap * page_k, swap_low_threshold * page_k);
        /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
        if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
        }
    } else if (wmark < WMARK_HIGH && swap_util_max < 100 &&
               (swap_util = calc_swap_utilization(&mi)) > swap_util_max) {
        /*
         * Too much anon memory is swapped out but swap is not low.
         * Non-swappable allocations created memory pressure.
         */
        kill_reason = LOW_MEM_AND_SWAP_UTIL;
        snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap utilization"
            " is high (%d%% > %d%%)", wmark < WMARK_LOW ? "min" : "low",
            swap_util, swap_util_max);
    } else if (wmark < WMARK_HIGH && thrashing > thrashing_limit) {
        /* Page cache is thrashing while memory is low */
        kill_reason = LOW_MEM_AND_THRASHING;
        snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%"
            PRId64 "%%)", wmark < WMARK_LOW ? "min" : "low", thrashing);
        cut_thrashing_limit = true;
        /* Do not kill perceptible apps unless thrashing at critical levels */
        if (thrashing < thrashing_critical_pct) {
            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
        }
        check_filecache = true;
    } else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
        /* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
        kill_reason = DIRECT_RECL_AND_THRASHING;
        snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
            PRId64 "%%)", thrashing);
        cut_thrashing_limit = true;
        /* Do not kill perceptible apps unless thrashing at critical levels */
        if (thrashing < thrashing_critical_pct) {
            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
        }
        check_filecache = true;
    } else if (check_filecache) {
        int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k;

        if (file_lru_kb < filecache_min_kb) {
            /* File cache is too low after thrashing, keep killing background processes */
            kill_reason = LOW_FILECACHE_AFTER_THRASHING;
            snprintf(kill_desc, sizeof(kill_desc),
                "filecache is low (%" PRId64 "kB < %" PRId64 "kB) after thrashing",
                file_lru_kb, filecache_min_kb);
            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
        } else {
            /* File cache is big enough, stop checking */
            check_filecache = false;
        }
    }

    /* Kill a process if necessary */
    if (kill_reason != NONE) {
        struct kill_info ki = {
            .kill_reason = kill_reason,
            .kill_desc = kill_desc,
            .thrashing = (int)thrashing,
            .max_thrashing = max_thrashing,
        };

        /* Allow killing perceptible apps if the system is stalled */
        if (critical_stall) {
            min_score_adj = 0;
        }
        //解析io
        psi_parse_io(&psi_data);
        //解析cpu
        psi_parse_cpu(&psi_data);
        int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm, &psi_data);
        if (pages_freed > 0) {
            killing = true;
            max_thrashing = 0;
            if (cut_thrashing_limit) {
                /*
                 * Cut thrasing limit by thrashing_limit_decay_pct percentage of the current
                 * thrashing limit until the system stops thrashing.
                 */
                thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
            }
        }
    }

no_kill:
    /* Do not poll if kernel supports pidfd waiting */
    if (is_waiting_for_kill()) {
        /* Pause polling if we are waiting for process death notification */
        poll_params->update = POLLING_PAUSE;
        return;
    }

    /*
     * Start polling after initial PSI event;
     * extend polling while device is in direct reclaim or process is being killed;
     * do not extend when kswapd reclaims because that might go on for a long time
     * without causing memory pressure
     */
    if (events || killing || reclaim == DIRECT_RECLAIM) {
        poll_params->update = POLLING_START;
    }

    /* Decide the polling interval */
    if (swap_is_low || killing) {
        /* Fast polling during and after a kill or when swap is low */
        poll_params->polling_interval_ms = PSI_POLL_PERIOD_SHORT_MS;
    } else {
        /* By default use long intervals */
        poll_params->polling_interval_ms = PSI_POLL_PERIOD_LONG_MS;
    }
}

static std::string GetCgroupAttributePath(const char* attr) {
    std::string path;
    if (!CgroupGetAttributePath(attr, &path)) {
        ALOGE("Unknown cgroup attribute %s", attr);
    }
    return path;
}

/*
 * Find one process to kill at or above the given oom_score_adj level.
 * Returns size of the killed process.
 */
static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
                                 struct wakeup_info *wi, struct timespec *tm,
                                 struct psi_data *pd) {
    int i;
    int killed_size = 0;
    bool lmk_state_change_start = false;
    bool choose_heaviest_task = kill_heaviest_task;

    for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
        struct proc *procp;

        if (!choose_heaviest_task && i <= PERCEPTIBLE_APP_ADJ) {
            /*
             * If we have to choose a perceptible process, choose the heaviest one to
             * hopefully minimize the number of victims.
             */
            choose_heaviest_task = true;
        }

        while (true) {
            procp = choose_heaviest_task ?
                proc_get_heaviest(i) : proc_adj_tail(i);

            if (!procp)
                break;

            killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm, pd);
            if (killed_size >= 0) {
                if (!lmk_state_change_start) {
                    lmk_state_change_start = true;
                    stats_write_lmk_state_changed(STATE_START);
                }
                break;
            }
        }
        if (killed_size) {
            break;
        }
    }

    if (lmk_state_change_start) {
        stats_write_lmk_state_changed(STATE_STOP);
    }

    return killed_size;
}

//记录wakeup 事件
static void record_wakeup_time(struct timespec *tm, enum wakeup_reason reason,
                               struct wakeup_info *wi) {
    wi->prev_wakeup_tm = wi->wakeup_tm;
    wi->wakeup_tm = *tm;
    if (reason == Event) {
        wi->last_event_tm = *tm;
        wi->wakeups_since_event = 0;
        wi->skipped_wakeups = 0;
    } else {
        wi->wakeups_since_event++;
    }
}

static void mainloop(void) {
    struct event_handler_info* handler_info;
    struct polling_params poll_params;
    struct timespec curr_tm;
    struct epoll_event *evt;
    long delay = -1;

    poll_params.poll_handler = NULL;
    poll_params.paused_handler = NULL;
    //死循环
    while (1) {
        struct epoll_event events[MAX_EPOLL_EVENTS];
        int nevents;
        int i;
        //已经存在poll_handler时
        if (poll_params.poll_handler) {
            bool poll_now;

            clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
            if (poll_params.update == POLLING_RESUME) {
                /* Just transitioned into POLLING_RESUME, poll immediately. */
                poll_now = true;
                nevents = 0;
            } else {
                /* Calculate next timeout */
                //计算下次timeout 时间
                //计算last_poll_tm 和 curr_tm 之间的差值
                delay = get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm);
                //delay  和 polling_interval_ms 做比较，若delay小于了预设置polling_interval_ms值，则再减去delay，若小于polling_interval_ms值，则直接使用delay 值.
                delay = (delay < poll_params.polling_interval_ms) ?
                    poll_params.polling_interval_ms - delay : poll_params.polling_interval_ms;

                /* Wait for events until the next polling timeout */
                //等待epoll 事件上报.
                nevents = epoll_wait(epollfd, events, maxevents, delay);

                /* Update current time after wait */
                clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
                //上报的时间间隔，大于预设的间隔。作为有效时间
                poll_now = (get_time_diff_ms(&poll_params.last_poll_tm, &curr_tm) >=
                    poll_params.polling_interval_ms);
            }
            if (poll_now) {
                //根据epoll上报事件，进行处理数据
                call_handler(poll_params.poll_handler, &poll_params, 0);
            }
        } else {
            //当poll_handler没有赋值时 kill_timeout_ms 表示是否达到timeout ,判断 last_kill_pid_or_fd 是否处理 
            if (kill_timeout_ms && is_waiting_for_kill()) {
                //获取当前时间
                clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
               ///
                delay = kill_timeout_ms - get_time_diff_ms(&last_kill_tm, &curr_tm);
                /* Wait for pidfds notification or kill timeout to expire */
                nevents = (delay > 0) ? epoll_wait(epollfd, events, maxevents, delay) : 0;
                 //判断是否超时
                if (nevents == 0) {
                    /* Kill notification timed out */
                    stop_wait_for_proc_kill(false);
                    if (polling_paused(&poll_params)) {
                        clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
                        poll_params.update = POLLING_RESUME;
                        resume_polling(&poll_params, curr_tm);
                    }
                }
            } else {
                /* Wait for events with no timeout */
                //若没有超时，则等待下一次事件
                nevents = epoll_wait(epollfd, events, maxevents, -1);
            }
        }

        if (nevents == -1) {
            //表示信号中断
            if (errno == EINTR)
                continue;
            ALOGE("epoll_wait failed (errno=%d)", errno);
            continue;
        }

        /*
         * First pass to see if any data socket connections were dropped.
         * Dropped connection should be handled before any other events
         * to deallocate data connection and correctly handle cases when
         * connection gets dropped and reestablished in the same epoll cycle.
         * In such cases it's essential to handle connection closures first.
         */
        for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
            //EPOLLHUP 表示对应的文件描述符被挂断,并且还有其他事件
            if ((evt->events & EPOLLHUP) && evt->data.ptr) {
                ALOGI("lmkd data connection dropped");
                handler_info = (struct event_handler_info*)evt->data.ptr;
                watchdog.start();
                //将事件关闭
                ctrl_data_close(handler_info->data);
                watchdog.stop();
            }
        }

        /* Second pass to handle all other events */
        for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
            //EPOLLERR：表示对应的文件描述符发生错误；
            if (evt->events & EPOLLERR) {
                ALOGD("EPOLLERR on event #%d", i);
            }
            //EPOLLHUP：表示对应的文件描述符被挂断；
            if (evt->events & EPOLLHUP) {
                /* This case was handled in the first pass */
                continue;
            }
            //有处理的事件，则处理
            if (evt->data.ptr) {
                handler_info = (struct event_handler_info*)evt->data.ptr;
                call_handler(handler_info, &poll_params, evt->events);
            }
        }
    }
}

// 处理事件
static void call_handler(struct event_handler_info* handler_info,
                         struct polling_params *poll_params, uint32_t events) {
    struct timespec curr_tm;
    //开启watchdog
    watchdog.start();
    //更新状态,初始状态设置成 POLLING_DO_NOT_CHANGE
    poll_params->update = POLLING_DO_NOT_CHANGE;
    //更新poll_params->update状态
    handler_info->handler(handler_info->data, events, poll_params);

    clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
    if (poll_params->poll_handler == handler_info) {
        poll_params->last_poll_tm = curr_tm;
    }
   //获取到poll_params->update 状态值
    switch (poll_params->update) {
    case POLLING_START:
        /*
         * Poll for the duration of PSI_WINDOW_SIZE_MS after the
         * initial PSI event because psi events are rate-limited
         * at one per sec.
         */
        poll_params->poll_start_tm = curr_tm;
        poll_params->poll_handler = handler_info;
        break;
    case POLLING_PAUSE:
        poll_params->paused_handler = handler_info;
        poll_params->poll_handler = NULL;
        break;
    case POLLING_RESUME:
        resume_polling(poll_params, curr_tm);
        break;
    case POLLING_DO_NOT_CHANGE:
        if (poll_params->poll_handler &&
            get_time_diff_ms(&poll_params->poll_start_tm, &curr_tm) > PSI_WINDOW_SIZE_MS) {
            /* Polled for the duration of PSI window, time to stop */
            poll_params->poll_handler = NULL;
        }
        break;
    }
    watchdog.stop();
}

//移除epoll事件
static void stop_wait_for_proc_kill(bool finished) {
    struct epoll_event epev;
    //last_kill_pid_or_fd 小于0，表示 last_kill_pid_or_fd 已经处理
    if (last_kill_pid_or_fd < 0) {
        return;
    }

    if (debug_process_killing) {
        struct timespec curr_tm;

        if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
            /*
             * curr_tm is used here merely to report kill duration, so this failure is not fatal.
             * Log an error and continue.
             */
            ALOGE("Failed to get current time");
        }

        if (finished) {
            ALOGI("Process got killed in %ldms",
                get_time_diff_ms(&last_kill_tm, &curr_tm));
        } else {
            ALOGI("Stop waiting for process kill after %ldms",
                get_time_diff_ms(&last_kill_tm, &curr_tm));
        }
    }

    if (pidfd_supported) {
        /* unregister fd */
        if (epoll_ctl(epollfd, EPOLL_CTL_DEL, last_kill_pid_or_fd, &epev)) {
            // Log an error and keep going
            ALOGE("epoll_ctl for last killed process failed; errno=%d", errno);
        }
        maxevents--;
        close(last_kill_pid_or_fd);
    }

    last_kill_pid_or_fd = -1;
}

//判断paused_handler是否赋值
static bool polling_paused(struct polling_params *poll_params) {
    return poll_params->paused_handler != NULL;
}

//从paused中恢复到事件中
static void resume_polling(struct polling_params *poll_params, struct timespec curr_tm) {
    poll_params->poll_start_tm = curr_tm;
    poll_params->poll_handler = poll_params->paused_handler;
    poll_params->polling_interval_ms = PSI_POLL_PERIOD_SHORT_MS;
    poll_params->paused_handler = NULL;
}

//init函数

static int init(void) {
    static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };

   //   /proc/zoneinfo
    struct reread_data file_data = {
        .filename = ZONEINFO_PATH,
        .fd = -1,
    };
    struct epoll_event epev;
    int pidfd;
    int i;
    int ret;
   
    page_k = sysconf(_SC_PAGESIZE);
    if (page_k == -1)
        page_k = PAGE_SIZE;
    page_k /= 1024;
    //创建全局epoll文件句柄
    epollfd = epoll_create(MAX_EPOLL_EVENTS);
    if (epollfd == -1) {
        ALOGE("epoll_create failed (errno=%d)", errno);
        return -1;
    }

    // mark data connections as not connected
    for (int i = 0; i < MAX_DATA_CONN; i++) {
        data_sock[i].sock = -1;
    }
    //打开lmkd socket文件句柄
    ctrl_sock.sock = android_get_control_socket("lmkd");
    if (ctrl_sock.sock < 0) {
        ALOGE("get lmkd control socket failed");
        return -1;
    }

    ret = listen(ctrl_sock.sock, MAX_DATA_CONN);
    if (ret < 0) {
        ALOGE("lmkd control socket listen failed (errno=%d)", errno);
        return -1;
    }

    epev.events = EPOLLIN;
    ctrl_sock.handler_info.handler = ctrl_connect_handler;
    epev.data.ptr = (void *)&(ctrl_sock.handler_info);

    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_sock.sock, &epev) == -1) {
        ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
        return -1;
    }
    maxevents++;

    has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
    use_inkernel_interface = has_inkernel_module;
    //若/sys/module/lowmemorykiller/parameters/minfree 不能访问，则使用in-kernel 里面的接口
    if (use_inkernel_interface) {
        ALOGI("Using in-kernel low memory killer interface");
        if (init_poll_kernel()) {
            epev.events = EPOLLIN;
            epev.data.ptr = (void*)&kernel_poll_hinfo;
            if (epoll_ctl(epollfd, EPOLL_CTL_ADD, kpoll_fd, &epev) != 0) {
                ALOGE("epoll_ctl for lmk events failed (errno=%d)", errno);
                close(kpoll_fd);
                kpoll_fd = -1;
            } else {
                maxevents++;
                /* let the others know it does support reporting kills */
                property_set("sys.lmk.reportkills", "1");
            }
        }
    } else {
        //初始化init_monitors函数
        if (!init_monitors()) {
            return -1;
        }
        /* let the others know it does support reporting kills */
        //sys.lmk.reportkills 标记是否可以启动lmkd
        property_set("sys.lmk.reportkills", "1");
    }

    for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
        procadjslot_list[i].next = &procadjslot_list[i];
        procadjslot_list[i].prev = &procadjslot_list[i];
    }

    memset(killcnt_idx, KILLCNT_INVALID_IDX, sizeof(killcnt_idx));

    /*
     * Read zoneinfo as the biggest file we read to create and size the initial
     * read buffer and avoid memory re-allocations during memory pressure
     */
    if (reread_file(&file_data) == NULL) {
        ALOGE("Failed to read %s: %s", file_data.filename, strerror(errno));
    }

    /* check if kernel supports pidfd_open syscall */
    pidfd = TEMP_FAILURE_RETRY(pidfd_open(getpid(), 0));
    if (pidfd < 0) {
        pidfd_supported = (errno != ENOSYS);
    } else {
        pidfd_supported = true;
        close(pidfd);
    }
    ALOGI("Process polling is %s", pidfd_supported ? "supported" : "not supported" );

    bool mbrain_init_success = init_mbrain();
    ALOGI("Mbrain trigger success: %s", mbrain_init_success ? "success" : "fail" );

    if (!lmkd_init_hook()) {
        ALOGE("Failed to initialize LMKD hooks.");
        return -1;
    }

    return 0;
}

androidu/frameworks/base/services/core/java/com/android/server/am/ProcessList.java

做事:

1. writeLmkd

//向lmkd 写数据
private static boolean writeLmkd(ByteBuffer buf, ByteBuffer repl) {
if (!sLmkdConnection.isConnected()) {
// try to connect immediately and then keep retrying
sKillHandler.sendMessage(
sKillHandler.obtainMessage(KillHandler.LMKD_RECONNECT_MSG));

// wait for connection retrying 3 times (up to 3 seconds)

// 等待连接重试3次（最多3秒）
if (!sLmkdConnection.waitForConnection(3 * LMKD_RECONNECT_DELAY_MS)) {
return false;
}
}
//调用LmkdConnection的exchange 方法
return sLmkdConnection.exchange(buf, repl);
}

/**
* Handle the unsolicited message from zygote.
*/

//监听来自Zygote的event的数据
private int handleZygoteMessages(FileDescriptor fd, int events) {
final int eventFd = fd.getInt$();
if ((events & EVENT_INPUT) != 0) {
// An incoming message from zygote
try {
final int len = Os.read(fd, mZygoteUnsolicitedMessage, 0,
mZygoteUnsolicitedMessage.length);
if (len > 0 && mZygoteSigChldMessage.length == Zygote.nativeParseSigChld(
mZygoteUnsolicitedMessage, len, mZygoteSigChldMessage)) {
mAppExitInfoTracker.handleZygoteSigChld(
mZygoteSigChldMessage[0] /* pid */,
mZygoteSigChldMessage[1] /* uid */,
mZygoteSigChldMessage[2] /* status */);
}
} catch (Exception e) {
Slog.w(TAG, "Exception in reading unsolicited zygote message: " + e);
}
}
return EVENT_INPUT;
}

system/memory/lmkd/libpsi/psi.cpp

做事:

1. init_psi_monitor　初始化psi 监控.

2. main 入口函数 ,watchdog 初始化.

3. mainloop函数

#define PSI_MON_FILE_MEMORY "/proc/pressure/memory"

//监听psi
int init_psi_monitor(enum psi_stall_type stall_type,
             int threshold_us, int window_us) {
    int fd;
    int res;
    char buf[256];
    //打开/proc/pressure/memory 文件
    fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC));
    if (fd < 0) {
        ALOGE("No kernel psi monitor support (errno=%d)", errno);
        return -1;
    }

    switch (stall_type) {
    case (PSI_SOME):
    case (PSI_FULL):
        res = snprintf(buf, sizeof(buf), "%s %d %d",
            stall_type_name[stall_type], threshold_us, window_us);
        break;
    default:
        ALOGE("Invalid psi stall type: %d", stall_type);
        errno = EINVAL;
        goto err;
    }
    //超过buf的限制
    if (res >= (ssize_t)sizeof(buf)) {
        ALOGE("%s line overflow for psi stall type '%s'",
            PSI_MON_FILE_MEMORY, stall_type_name[stall_type]);
        errno = EINVAL;
        goto err;
    }

    res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1));
    if (res < 0) {
        ALOGE("%s write failed for psi stall type '%s'; errno=%d",
            PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno);
        goto err;
    }

    return fd;

err:
    close(fd);
    return -1;
}

int register_psi_monitor(int epollfd, int fd, void* data) {
    int res;
    struct epoll_event epev;

    epev.events = EPOLLPRI;
    epev.data.ptr = data;
    //将fd 事件关联起来,新增描述符
    res = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &epev);
    if (res < 0) {
        ALOGE("epoll_ctl for psi monitor failed; errno=%d", errno);
    }
    return res;
}

//主函数
int main(int argc, char **argv) {
    if ((argc > 1) && argv[1] && !strcmp(argv[1], "--reinit")) {
        if (property_set(LMKD_REINIT_PROP, "")) {
            ALOGE("Failed to reset " LMKD_REINIT_PROP " property");
        }
        return issue_reinit();
    }
    //更新prop 一系列和lmk相关的prop值
    if (!update_props()) {
        ALOGE("Failed to initialize props, exiting.");
        return -1;
    }
    //eventlog
    ctx = create_android_logger(KILLINFO_LOG_TAG);
    //初始化
    if (!init()) {
        if (!use_inkernel_interface) {
            /*
             * MCL_ONFAULT pins pages as they fault instead of loading
             * everything immediately all at once. (Which would be bad,
             * because as of this writing, we have a lot of mapped pages we
             * never use.) Old kernels will see MCL_ONFAULT and fail with
             * EINVAL; we ignore this failure.
             *
             * N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
             * pins âŠ† MCL_CURRENT, converging to just MCL_CURRENT as we fault
             * in pages.
             */
            /* CAP_IPC_LOCK required */
            //锁住该实时进程在物理内存上全部地址空间。这将阻止Linux将这个内存页调度到交换空间(swap space)，
           // 及时该进程已有一段时间没有访问这段空间。
            if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
                ALOGW("mlockall failed %s", strerror(errno));
            }

            /* CAP_NICE required */
            struct sched_param param = {
                    .sched_priority = 1,
            };
            //调度模式先进先出
            if (sched_setscheduler(0, SCHED_FIFO, &param)) {
                ALOGW("set SCHED_FIFO failed %s", strerror(errno));
            }
        }

        if (init_reaper()) {
            ALOGI("Process reaper initialized with %d threads in the pool",
                reaper.thread_cnt());
        }
       //watch dog 初始化
        if (!watchdog.init()) {
            ALOGE("Failed to initialize the watchdog");
        }
        //循环等待
        mainloop();
    }

    android_log_destroy(&ctx);

    ALOGI("exiting");
    return 0;
}

方案记录:

1.mem_free：剩余可分配内存大小，每个策略对应不同的剩余内存大小阈值

2. pgscan_kswapd：间接回收的内存页数，通过前后两次的数量差和时间差，计算出单位时间内kswpad回收的内存页数，来表示kswpad的活跃状态

3. swap_free：Swap的剩余大小，每个策略对应不同的交换分区内存大小阈值

4. pgscan_direct：直接回收的内存页数，通过前后两次的数量差和时间差，计算出单位时间内直接回收内存页数，来表示内存的压力情况

5. mem_cache

6. mem_available_kb

7. mem_total_kb

8. wmark

9 .swap_total

策略记录:

1. 当CPU 占有率超过 85% 开始杀. adj >=900

2. 使用kill进程的方法，不是forcestop 方法。

获取CPU 占用率

计算CPU 占用率:

安卓cpu信息查看与cpu占用率计算_android cpu 占用率-CSDN博客

代码:

1.androidu/system/memory/lmkd/include/lmkd.h

enum lmk_cmd {
LMK_TARGET = 0, /* Associate minfree with oom_adj_score */
LMK_PROCPRIO, /* Register a process and set its oom_adj_score */
LMK_PROCREMOVE, /* Unregister a process */
LMK_PROCPURGE, /* Purge all registered processes */
LMK_GETKILLCNT, /* Get number of kills */
LMK_SUBSCRIBE, /* Subscribe for asynchronous events */
LMK_PROCKILL, /* Unsolicited msg to subscribed clients on proc kills */
LMK_UPDATE_PROPS, /* Reinit properties */
LMK_STAT_KILL_OCCURRED, /* Unsolicited msg to subscribed clients on proc kills for statsd log */
LMK_STAT_STATE_CHANGED, /* Unsolicited msg to subscribed clients on state changed */
+ LMK_STAT_LEVEL_CHANGED,
};

-#define MAX_TARGETS 6
+#define MAX_TARGETS 16

+/* LMK_LEVEL packet payload */
+struct lmk_level_data {
+ int level;
+ int64_t free_swap_kb;
+ int64_t free_mem_kb;
+ int64_t pgscan_kswapd;
+ int64_t pgscan_direct;
+ int64_t mem_cache_kb;
+ int64_t mem_available_kb;
+ int64_t mem_total_kb;
+ int64_t swap_total_kb;
+ int wmark;
+};

+static inline size_t lmkd_pack_set_level_changed(LMKD_CTRL_PACKET packet,struct lmk_level_data data) {
+ packet[0] = htonl(LMK_STAT_LEVEL_CHANGED);
+ packet[1] = htonl(data.level);
+ packet[2] = htonl(data.free_swap_kb);
+ packet[3] = htonl(data.free_mem_kb);
+ packet[4] = htonl(data.pgscan_kswapd);
+ packet[5] = htonl(data.pgscan_direct);
+ packet[6] = htonl(data.mem_cache_kb);
+ packet[7] = htonl(data.mem_available_kb);
+ packet[8] = htonl(data.mem_total_kb);
+ packet[9] = htonl(data.swap_total_kb);
+ packet[10] = htonl(data.wmark);
+ return 3 * sizeof(int) + 8 * sizeof(int64_t);
+}

2. androidu/system/memory/lmkd/lmkd.cpp

/* Fields to parse in /proc/meminfo */
enum meminfo_field {
- MI_NR_FREE_PAGES = 0,
+ MI_MEM_TOTAL = 0,
+ MI_NR_FREE_PAGES,
+ MI_MEM_AVAILABLE,
MI_CACHED,
MI_SWAP_CACHED,
MI_BUFFERS,

static const char* const meminfo_field_names[MI_FIELD_COUNT] = {
+ "MemTotal:",
"MemFree:",
+ "MemAvailable:",
"Cached:",
"SwapCached:",
"Buffers:",

union meminfo {
struct {
+ int64_t mem_total_kb;
int64_t nr_free_pages;
+ int64_t nr_mem_available;
int64_t cached;
int64_t swap_cached;
int64_t buffers;
@@ -770,6 +776,7 @@ static ssize_t ctrl_data_read(int dsock_idx, char* buf, size_t bufsz, struct ucr
}

static int ctrl_data_write(int dsock_idx, char* buf, size_t bufsz) {
+ ALOGE("ctrl_data_write");
int ret = 0;

ret = TEMP_FAILURE_RETRY(write(data_sock[dsock_idx].sock, buf, bufsz));
@@ -789,6 +796,7 @@ static int ctrl_data_write(int dsock_idx, char* buf, size_t bufsz) {
* will receive this unsolicited notification.
*/
static void ctrl_data_write_lmk_kill_occurred(pid_t pid, uid_t uid) {
+ ALOGE("ctrl_data_write_lmk_kill_occurred pid:%i ,uid:%i", pid,uid);
LMKD_CTRL_PACKET packet;
size_t len = lmkd_pack_set_prockills(packet, pid, uid);

@@ -799,11 +807,26 @@ static void ctrl_data_write_lmk_kill_occurred(pid_t pid, uid_t uid) {
}
}

+
+static void ctrl_data_write_lmk_level_changed(lmk_level_data data) {
+ ALOGE("ctrl_data_write_lmk_level_changed");
+ LMKD_CTRL_PACKET packet;
+ size_t len = lmkd_pack_set_level_changed(packet,data);
+
+ for (int i = 0; i < MAX_DATA_CONN; i++) {
+ if (data_sock[i].sock >= 0 && data_sock[i].async_event_mask & 1 << LMK_ASYNC_EVENT_KILL) {
+ ctrl_data_write(i, (char*)packet, len);
+ }
+ }
+}
+

/*
* Write the kill_stat/memory_stat over the data socket to be propagated via AMS to statsd
*/
static void stats_write_lmk_kill_occurred(struct kill_stat *kill_st,
struct memory_stat *mem_st) {
+ ALOGE("stats_write_lmk_kill_occurred");
LMK_KILL_OCCURRED_PACKET packet;
const size_t len = lmkd_pack_set_kill_occurred(packet, kill_st, mem_st);
if (len == 0) {
@@ -830,6 +853,7 @@ static void stats_write_lmk_kill_occurred_pid(int pid, struct kill_stat *kill_st
* Write the state_changed over the data socket to be propagated via AMS to statsd
*/
static void stats_write_lmk_state_changed(enum lmk_state state) {
+ ALOGE("stats_write_lmk_state_changed");
LMKD_CTRL_PACKET packet_state_changed;
const size_t len = lmkd_pack_set_state_changed(packet_state_changed, state);
if (len == 0) {
@@ -1473,6 +1497,7 @@ static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) {
}

static void ctrl_command_handler(int dsock_idx) {
+ ALOGE("ctrl_command_handler");
LMKD_CTRL_PACKET packet;
struct ucred cred;
int len;
@@ -1492,6 +1517,7 @@ static void ctrl_command_handler(int dsock_idx) {
}

cmd = lmkd_pack_get_cmd(packet);
+ ALOGE("ctrl_command_handler command code %d", cmd);
nargs = len / sizeof(int) - 1;
if (nargs < 0)
goto wronglen;
@@ -1578,6 +1604,7 @@ wronglen:

static void ctrl_data_handler(int data, uint32_t events,
struct polling_params *poll_params __unused) {
+ ALOGI("ctrl_data_handler");
if (events & EPOLLIN) {

static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_params) {
+
+ ALOGE("mp_event_psi");

if (!psi_parse_mem(&psi_data)) {
critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
}
+
+ //上报:mem_free, swap_free, pgscan_kswpad, pgscan_direct
+
+ ALOGE("mp_event_psi level:%s,free_swap_kb:%ld,free_mem_kb:%ld,pgscan_kswapd:%ld,pgscan_direct:%ld",level_name[level],get_free_swap(&mi) * page_k,mi.field.nr_free_pages * page_k,vs.field.pgscan_kswapd,vs.field.pgscan_direct);
+
+
+ ALOGE("mp_event_psi level:%s,page_k:%ld,mem_cache:%ld,mem_available_kb:%ld,mem_total_kb:%ld,swap_total:%ld,wmark:%d",level_name[level],page_k,mi.field.cached,mi.field.nr_mem_available,mi.field.mem_total_kb,mi.field.total_swap,wmark);
+
+ struct lmk_level_data level_data;
+ level_data.level = level;
+ level_data.free_swap_kb = get_free_swap(&mi) * page_k;
+ level_data.free_mem_kb = mi.field.nr_free_pages * page_k;
+ level_data.pgscan_kswapd = vs.field.pgscan_kswapd;
+ level_data.pgscan_direct = vs.field.pgscan_direct;
+ level_data.mem_cache_kb = mi.field.cached;
+ level_data.mem_available_kb = mi.field.nr_mem_available;
+ level_data.mem_total_kb = mi.field.mem_total_kb;
+ level_data.swap_total_kb = mi.field.total_swap;
+ level_data.wmark = wmark;
+ //if(level_changed){
+ ctrl_data_write_lmk_level_changed(level_data);
+ //}

3. androidu/frameworks/base/services/core/java/com/android/server/am/LmkdConnection.java

- private static final String TAG = TAG_WITH_CLASS_NAME ? "LmkdConnection" : TAG_AM;
+ private static final String TAG = "LmkdConnection";

4. androidu/frameworks/base/services/core/java/com/android/server/am/ProcessList.java

+import java.io.FileReader;
+import java.io.BufferedReader;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;

- static final String TAG = TAG_WITH_CLASS_NAME ? "ProcessList" : TAG_AM;
+ static final String TAG = "ProcessList";

static final byte LMK_STATE_CHANGED = 9; // Msg to subscribed clients on state changed
+ static final byte LMK_LEVEL_CHANGED = 10;

LmkdStatsReporter.logStateChanged(state);
return true;
+ case LMK_LEVEL_CHANGED:
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED");
+ int level = inputData.readInt();
+ long freeSwapKb = inputData.readInt();
+ long freeMemKb = inputData.readInt();
+ long pgscanKswapd = inputData.readInt();
+ long pgscanDirect = inputData.readInt();
+ long memCacheKb = inputData.readInt();
+ long memAvailableKb = inputData.readInt();
+ long memTotalKb = inputData.readInt();
+ long swapTotalKb = inputData.readInt();
+ long wmark = inputData.readInt();
+
+
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED level:" + level +",freeSwapKb:"+freeSwapKb +",freeMemKb:"+freeMemKb+",pgscanKswapd:"+pgscanKswapd+",pgscanDirect:"+pgscanDirect+",memCacheKb:"+memCacheKb+",memAvailableKb:"+memAvailableKb+",memTotalKb:"+memTotalKb+",swapTotalKb:"+swapTotalKb+",wmark:"+wmark);
+
+ Slog.i(TAG, "handleUnsolicitedMessage LMK_LEVEL_CHANGED cpu rate:" + getCPURate());
+ return true;
default:
return false;

+
+ private void getKillApp(){
+ //cache current running process
+ ArrayList<ProcessRecord> procs = new ArrayList<>();
+ synchronized(mService) {
+ procs.addAll(mService.mProcessList.getLruProcessesLOSP());
+ }
+
+ for (int i = 0; i < procs.size(); i++) {
+ ProcessRecord app = procs.get(i);
+ final ProcessErrorStateRecord errState = app.mErrorstate;
+ if (app.getThread() == null) {
+ Slog.w(TAG，"skip"+ app.processName + " by app.getThread( ) == null");
+ continue;
+ }
+ if (errState.isCrashing()) {
+ Slog.w(TAG，"skip" + app.processName + " by crashing");
+ continue;
+ }
+
+ if (errState.isNotResponding()) {
+ Slog.w(TAG，"skip"+ app.processName +"by NotResponding");
+ continue;
+ }
+ ApplicationInfo appInfo = app.info;
+ int appAdj = app.mstate.getSetAdj();
+ ProcessStateRecord state - app.mstate;
+ if (DEBUG) {
+ Slog.d(TAG，"checking process:" + app.processName + "，adj:" + appAdj + ", state:" + state.getCurProcState()
+ +"，adjtype:"+ state.getAdjType() + ",cached:"+ state.isCached());
+ }
+
+ if (appAdj < minAdj) {
+ // process adj lower than minAdj, this package should not be killed
+ skipPkgSet.add(appInfo.packageName);
+ if (killTargetMap.containsKey(appInfo.packageName)){
+ killTargetMap.remove(appInfo.packageName);
+ if (DEBUG_MORE) {
+ slog.d(TAG，"skip pkg, adj: " + appAdj + "procName:" + app.processName + "pkgname:" + appInfo.packageName);
+ }
+ continue;
+ }
+}
+
+
+
+ /**
+ * ^ 表示匹配行的开头。
+ * cpu 匹配 "cpu" 这个单词。
+ * \\s+ 匹配一个或多个空格字符。
+ * (\\d+\\s+){9} 匹配由一个或多个数字加上一个或多个空格字符组成的序列，重复9次。
+ * \\d+ 匹配一个或多个数字。
+ * $ 匹配行的结尾。因此，整个正则表达式可以匹配以 "cpu" 开头，后面跟着10个由空格分隔的数字的行。
+ */
+private static String getCPURate() {
+ String path = "/proc/stat";// 系统CPU信息文件
+ long Totaljiffies[] = new long[2];
+ long totalIdle[] = new long[2];
+ FileReader fileReader = null;
+ BufferedReader bufferedReader = null;
+ Pattern pattern = Pattern.compile("^cpu\\s+(\\d+\\s+){9}\\d+$", Pattern.MULTILINE);
+ //正则表达式，只获取第一行
+ for (int i = 0; i < 2; i++) { //每一次调用分为两次获取方便求差
+ Totaljiffies[i] = 0;
+ totalIdle[i] = 0;
+ try {
+ fileReader = new FileReader(path);
+ bufferedReader = new BufferedReader(fileReader, 8192);
+ String str;
+ while ((str = bufferedReader.readLine()) != null) { //读取stat信息
+ if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的
+ Matcher matcher = pattern.matcher(str);//直接获取第一行cpu开头的数据
+ // 不需要cpu0-7的，那样的话还得多几步运算
+ while (matcher.find()) {
+ String[] values = extractValues(matcher.group());
+ Totaljiffies[i] = sumValues(values);
+ totalIdle[i] = Long.parseLong(values[3]);
+ }
+ }
+ if(i==0){//第一次获取后进行延时等待系统更新信息
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } finally {
+ if (bufferedReader != null) {
+ try {
+ bufferedReader.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+ double rate = 0;
+ if (Totaljiffies[1] > Totaljiffies[0] ) {//正常情况下第二次总的jiffies一定比第一次获得的数据大
+ rate = 1.0 * ((Totaljiffies[1] - totalIdle[1]) - (Totaljiffies[0] - totalIdle[0]))
+ / (Totaljiffies[1] - Totaljiffies[0]);
+ }
+ return String.valueOf(rate);
+ }
+ /**
+ * 头行去掉cpu，合并成String[]数组
+ * @param input
+ * @return
+ */
+ public static String[] extractValues(String input) {
+ String[] parts = input.split("\\s+");
+ String[] values = new String[parts.length - 1]; // 去掉 "cpu"，所以长度减一
+
+ System.arraycopy(parts, 1, values, 0, parts.length - 1);
+ return values;
+ }
+ /**
+ * 求数组和
+ * @param input
+ * @return
+ */
+ public static Long sumValues(String input[]) {
+ Long sum = Long.valueOf(0);
+ for (String value : input) {
+ sum += Integer.parseInt(value);
+ }
+ return sum;
+ }

5. androidu/system/core/rootdir/init.rc

修改权限:

# make the PSI monitor accessible to others

chown system system /proc/pressure/cpu
chmod 0664 /proc/pressure/cpu

应用待机分组:

frameworks/base/core/java/android/app/usage/UsageStatsManager.java

getAppStandbyBucket(String packageName)

    /**
     * The app was used very recently, currently in use or likely to be used very soon. Standby
     * bucket values that are &le; {@link #STANDBY_BUCKET_ACTIVE} will not be throttled by the
     * system while they are in this bucket. Buckets &gt; {@link #STANDBY_BUCKET_ACTIVE} will most
     * likely be restricted in some way. For instance, jobs and alarms may be deferred.
     * @see #getAppStandbyBucket()
     */
    //应用目前正在使用中，或者最近刚刚使用过
    public static final int STANDBY_BUCKET_ACTIVE = 10;

    /**
     * The app was used recently and/or likely to be used in the next few hours. Restrictions will
     * apply to these apps, such as deferral of jobs and alarms.
     * @see #getAppStandbyBucket()
     */
    //应用会定期使用
    public static final int STANDBY_BUCKET_WORKING_SET = 20;

    /**
     * The app was used in the last few days and/or likely to be used in the next few days.
     * Restrictions will apply to these apps, such as deferral of jobs and alarms. The delays may be
     * greater than for apps in higher buckets (lower bucket value). Bucket values &gt;
     * {@link #STANDBY_BUCKET_FREQUENT} may additionally have network access limited.
     * @see #getAppStandbyBucket()
     */
    //应用会经常使用，但不会每天使用
    public static final int STANDBY_BUCKET_FREQUENT = 30;

    /**
     * The app has not be used for several days and/or is unlikely to be used for several days.
     * Apps in this bucket will have more restrictions, including network restrictions, except
     * during certain short periods (at a minimum, once a day) when they are allowed to execute
     * jobs, access the network, etc.
     * @see #getAppStandbyBucket()
     */
    //应用不经常使用
    public static final int STANDBY_BUCKET_RARE = 40;

    /**
     * The app has not be used for several days, is unlikely to be used for several days, and has
     * been misbehaving in some manner.
     * Apps in this bucket will have the most restrictions, including network restrictions and
     * additional restrictions on jobs.
     * <p> Note: this bucket is not enabled in {@link Build.VERSION_CODES#R}.
     * @see #getAppStandbyBucket()
     */
    //受限制
    public static final int STANDBY_BUCKET_RESTRICTED = 45;

    /**
     * The app has never been used.
     * {@hide}
     */
    @SystemApi
  //已安装但从未运行过的应用，系统会对这些应用施加严格的限制 
    public static final int STANDBY_BUCKET_NEVER = 50;

计算CPU 占有率:

/**
* ^ 表示匹配行的开头。
* cpu 匹配 "cpu" 这个单词。
* \\s+ 匹配一个或多个空格字符。
* (\\d+\\s+){9} 匹配由一个或多个数字加上一个或多个空格字符组成的序列，重复9次。
* \\d+ 匹配一个或多个数字。
* $ 匹配行的结尾。因此，整个正则表达式可以匹配以 "cpu" 开头，后面跟着10个由空格分隔的数字的行。
*/
private static String getCPURate() {
String path = "/proc/stat";// 系统CPU信息文件
long Totaljiffies[] = new long[2];
long totalIdle[] = new long[2];
FileReader fileReader = null;
BufferedReader bufferedReader = null;
Pattern pattern = Pattern.compile("^cpu\\s+(\\d+\\s+){9}\\d+$", Pattern.MULTILINE);
//正则表达式，只获取第一行
for (int i = 0; i < 2; i++) { //每一次调用分为两次获取方便求差
Totaljiffies[i] = 0;
totalIdle[i] = 0;
try {
fileReader = new FileReader(path);
bufferedReader = new BufferedReader(fileReader, 8192);
String str;
while ((str = bufferedReader.readLine()) != null) { //读取stat信息
if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的
Matcher matcher = pattern.matcher(str);//直接获取第一行cpu开头的数据
// 不需要cpu0-7的，那样的话还得多几步运算
while (matcher.find()) {
String[] values = extractValues(matcher.group());
Totaljiffies[i] = sumValues(values);
totalIdle[i] = Long.parseLong(values[3]);
}
}
if(i==0){//第一次获取后进行延时等待系统更新信息
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (bufferedReader != null) {
try {
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
double rate = 0;
if (Totaljiffies[1] > Totaljiffies[0] ) {//正常情况下第二次总的jiffies一定比第一次获得的数据大
rate = 1.0 * ((Totaljiffies[1] - totalIdle[1]) - (Totaljiffies[0] - totalIdle[0]))
/ (Totaljiffies[1] - Totaljiffies[0]);
}
return String.valueOf(rate);
}
/**
* 头行去掉cpu，合并成String[]数组
* @param input
* @return
*/
public static String[] extractValues(String input) {
String[] parts = input.split("\\s+");
String[] values = new String[parts.length - 1]; // 去掉 "cpu"，所以长度减一

System.arraycopy(parts, 1, values, 0, parts.length - 1);
return values;
}
/**
* 求数组和
* @param input
* @return
*/
public static Long sumValues(String input[]) {
Long sum = Long.valueOf(0);
for (String value : input) {
sum += Integer.parseInt(value);
}
return sum;
}

一、epoll_wait函数

1.1、函数定义

int epoll_wait(int epfd, struct epoll_event * events, int maxevents, int timeout);

作用： 等待监听的所有fd相应事件的产生。

1.2、参数详解：

1) int epfd： epoll_create()函数返回的epoll实例的句柄。

2) struct epoll_event * events： 接口的返回参数，epoll把发生的事件的集合从内核复制到 events数组中。events数组是一个用户分配好大小的数组，数组长度大于等于maxevents。（events不可以是空指针，内核只负责把数据复制到这个 events数组中，不会去帮助我们在用户态中分配内存）

3) int maxevents： 表示本次可以返回的最大事件数目，通常maxevents参数与预分配的events数组的大小是相等的。

4) int timeout： 表示在没有检测到事件发生时最多等待的时间，超时时间(>=0)，单位是毫秒ms，-1表示阻塞，0表示不阻塞。

1.3、返回值：

成功返回需要处理的事件数目。失败返回0，表示等待超时。

二. `epoll事件延迟:`

在Linux中，epoll 本身并没有优先级的概念，因为 epoll 是一种事件通知机制，它主要用于高效地处理大量文件描述符上的 I/O 事件。但是，您可以通过一些方法来提高 epoll 监控的文件描述符的优先级或处理速度，以改善整体性能：

优化事件处理逻辑：确保事件处理逻辑高效，避免阻塞和耗时的操作。尽量减少事件处理器中的计算量，确保事件得到及时处理。
合理设置超时时间：在调用 epoll_wait 时，合理设置超时时间，以便在有事件发生时及时返回，避免不必要的等待。
合理设置触发模式：根据实际需求选择合适的触发模式（如边缘触发模式 EPOLLET 或水平触发模式 EPOLLIN、EPOLLOUT），以提高事件的响应速度。
使用多线程/进程：将 epoll 监控的文件描述符的处理逻辑放在独立的线程或进程中，以提高并发处理能力。
系统优化：对系统进行优化，包括调整内核参数、增加系统资源等，以提高整体性能。
使用更高级别的框架：考虑使用更高级别的框架或库，如 libevent、Boost.Asio 等，它们可以在 epoll 的基础上提供更加方便和高效的事件处理机制。
性能分析和调优：使用性能分析工具对程序进行分析，找出性能瓶颈并进行优化，以提高 epoll 的处理速度。

需要注意的是，尽管可以通过上述方法来提高 epoll 监控的文件描述符的处理速度和效率，但是 epoll 本身并没有优先级的概念。您应该根据具体的应用场景和需求，结合上述建议来优化 epoll 的性能。

惠(xi)斌

关注

15
点赞
踩
19

收藏

觉得还不错? 一键收藏
0
评论
LMKD 调试记录

if (Totaljiffies[1] > Totaljiffies[0] ) {//正常情况下第二次总的jiffies一定比第一次获得的数据大。+ if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的。if (str.toLowerCase().startsWith("cpu")) {//以cpu开头的。
复制链接

扫一扫