最近工作有需要通过PSI在某些场景下控制内存的相关操作,所以了解了一下PSI。
PSI(Pressure Stall Information),通过它可以监控与接收内存,CPU,IO的异常的通知,其节点在/proc/pressure下。由于目前工作中心主要集中内存,所以下文只考虑PSI内存节点的源码分析与使用。
1. 内核PSI节点定义
kernel/msm-4.19/kernel/sched/psi.c
static const struct file_operations psi_memory_fops = {
.open = psi_memory_open,
.read = seq_read,
.llseek = seq_lseek,
.write = psi_memory_write,
.poll = psi_fop_poll,
.release = psi_fop_release,
};
static int __init psi_proc_init(void)
{
proc_mkdir("pressure", NULL);
proc_create("pressure/io", 0, NULL, &psi_io_fops);
proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
return 0;
}
2. psi控制
通过write结点可以设置触发事件, 如下在用户空间设置
在任何 1 秒的时间窗口内,如果一个或多个进程因为等待 IO 而造成的时间停顿超过了阈值 150ms,将触发通知事件。
当用于定义触发器的 PSI 接口文件描述符被关闭时,触发器将被取消注册。
kernel/msm-4.19/Documentation/accounting/psi.txt
Userspace monitor usage example
===============================
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <poll.h>
#include <string.h>
#include <unistd.h>
/*
* Monitor memory partial stall with 1s tracking window size
* and 150ms threshold.
*/
int main() {
const char trig[] = "some 150000 1000000";
struct pollfd fds;
int n;
fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
if (fds.fd < 0) {
printf("/proc/pressure/memory open error: %s\n",
strerror(errno));
return 1;
}
fds.events = POLLPRI;
if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
printf("/proc/pressure/memory write error: %s\n",
strerror(errno));
return 1;
}
printf("waiting for events...\n");
while (1) {
n = poll(&fds, 1, -1);
if (n < 0) {
printf("poll error: %s\n", strerror(errno));
return 1;
}
if (fds.revents & POLLERR) {
printf("got POLLERR, event source is gone\n");
return 0;
}
if (fds.revents & POLLPRI) {
printf("event triggered!\n");
} else {
printf("unknown event received: 0x%x\n", fds.revents);
return 1;
}
}
return 0;
}
3. LMKD控制PSI
只贴调用栈(细节自己看),在下面的调用栈里,lmkd通过第二节官方推荐的方式控制PSI的力度
system/core/lmkd/lmkd.c main()
--->system/core/lmkd/lmkd.c init()
--->system/core/lmkd/lmkd.c init_psi_monitors()
--->system/core/lmkd/lmkd.c init_mp_psi()
--->system/core/lmkd/libpsi/psi.c init_psi_monitor()
在init_psi_monitors函数里,lmkd设置了四种psi的监控,去监控/proc/pressure/memory。
- VMPRESS_LEVEL_LOW
- VMPRESS_LEVEL_MEDIUM
- VMPRESS_LEVEL_CRITICAL
- VMPRESS_LEVEL_SUPER_CRITICAL
分别对应着以下几种监控力度:
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
{ PSI_SOME, 70 }, /* 70ms out of 1sec for partial stall */
{ PSI_SOME, 100 }, /* 100ms out of 1sec for partial stall */
{ PSI_FULL, 70 }, /* 70ms out of 1sec for complete stall */
{ PSI_FULL, PSI_SCRIT_COMPLETE_STALL_MS }, /* Default 80ms out of 1sec for complete stall */
};
init_mp_psi函数中设置监控力度:
static bool init_mp_psi(enum vmpressure_level level) {
int fd = init_psi_monitor(psi_thresholds[level].stall_type,
psi_thresholds[level].threshold_ms * US_PER_MS,
psi_window_size_ms * US_PER_MS);
if (fd < 0) {
return false;
}
vmpressure_hinfo[level].handler = mp_event_common;
vmpressure_hinfo[level].data = level;
if (register_psi_monitor(epollfd, fd, &vmpressure_hinfo[level]) < 0) {
destroy_psi_monitor(fd);
return false;
}
maxevents++;
mpevfd[level] = fd;
return true;
}
4. 安卓framework中获取PSI信息
在framewokr中,通过getMemFactor方法得到PSI内存的压力信息,有四个级别
- MEM_PRESSURE_NONE
- MEM_PRESSURE_LOW
- MEM_PRESSURE_MEDIUM
- MEM_PRESSURE_HIGH
在LowMemDetector.java的内部类LowMemThread中会卡在waitForPressure的本地方法中,接受PSI的通知。
frameworks/base/services/core/java/com/android/server/am/LowMemDetector.java
private int mPressureState = MEM_PRESSURE_NONE;
/* getPressureState return values */
public static final int MEM_PRESSURE_NONE = 0;
public static final int MEM_PRESSURE_LOW = 1;
public static final int MEM_PRESSURE_MEDIUM = 2;
public static final int MEM_PRESSURE_HIGH = 3;
public int getMemFactor() {
synchronized (mPressureStateLock) {
return mPressureState;
}
}
private native int init();
private native int waitForPressure();
private final class LowMemThread extends Thread {
public void run() {
while (true) {
// sleep waiting for a PSI event
int newPressureState = waitForPressure();
if (newPressureState == -1) {
// epoll broke, tear this down
mAvailable = false;
break;
}
// got a PSI event? let's update lowmem info
synchronized (mPressureStateLock) {
mPressureState = newPressureState;
}
}
}
}
frameworks/base/services/core/jni/com_android_server_am_LowMemDetector.cpp
static jint android_server_am_LowMemDetector_waitForPressure(JNIEnv*, jobject) {
static uint32_t pressure_level = PRESSURE_NONE;
struct epoll_event events[PRESSURE_LEVEL_COUNT];
int nevents = 0;
do {
if (pressure_level == PRESSURE_NONE) {
/* Wait for events with no timeout */
nevents = epoll_wait(psi_epollfd, events, PRESSURE_LEVEL_COUNT, -1);
} else {
// This is simpler than lmkd. Assume that the memory pressure
// state will stay high for at least 1s. Within that 1s window,
// the memory pressure state can go up due to a different FD
// becoming available or it can go down when that window expires.
// Accordingly, there's no polling: just epoll_wait with a 1s timeout.
nevents = epoll_wait(psi_epollfd, events, PRESSURE_LEVEL_COUNT, 1000);
if (nevents == 0) {
pressure_level = PRESSURE_NONE;
return pressure_level;
}
}
// keep waiting if interrupted
} while (nevents == -1 && errno == EINTR);
// reset pressure_level and raise it based on received events
pressure_level = PRESSURE_NONE;
for (int i = 0; i < nevents; i++) {
if (events[i].events & (EPOLLERR | EPOLLHUP)) {
// should never happen unless psi got disabled in kernel
ALOGE("Memory pressure events are not available anymore");
return -1;
}
// record the highest reported level
if (events[i].data.u32 > pressure_level) {
pressure_level = events[i].data.u32;
}
}
return pressure_level;
}