Zygote意为“受精卵”,即所有的Android进程都是由其“发育”而来的意思。本人试着把啃代码学到的内容整理出来,希望会对自己以后有用。
Init
Init进程是Linux系统上的第一个用户进程,用户为root,拥有系统中最高的权限。它会实现挂载文件系统,并启动ServiceManager/Zygote两大进程,正式这两大进程构成了Android系统的基础。
简单来说,Init进程通过解析init.rc文件,然后执行自己的工作,其中,Zygote进程相关的配置如下(system/core/rootdir/init.rc):
service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
class main
socket zygote stream 660 root system
onrestart write /sys/android_power/request_state wake
onrestart write /sys/power/state on
onrestart restart media
onrestart restart netd
从上面,可以看到Zygote实际上执行的是/system/bin/app_process应用程序,参数中包含了--zygote和--start-system-server。 另外,
socket zygote stream 660 root system
属性的配置,说明Init进程在启动zygote进程时,会为zygote进程创建一个stream型的domain socket,并保存到ANDROID_SOCKET_zygote系统变量中。(对这个过程涉及到init进程的实现,可以参考代码system/core/init/init.c)
app_process
Init进程启动Zygote进程以后,代码执行到app_process的主入口(frameworks/base/cmds/app_process/app_main.cpp):
int main(int argc, const char* const argv[])
{
// These are global variables in ProcessState.cpp
mArgC = argc;
mArgV = argv;
mArgLen = 0;
for (int i=0; i<argc; i++) {
mArgLen += strlen(argv[i]) + 1;
}
mArgLen--;
AppRuntime runtime;
const char* argv0 = argv[0];
// Process command line arguments
// ignore argv[0]
argc--;
argv++;
// Everything up to '--' or first non '-' arg goes to the vm
int i = runtime.addVmArguments(argc, argv);
// Parse runtime arguments. Stop at first unrecognized option.
bool zygote = false;
bool startSystemServer = false;
bool application = false;
const char* parentDir = NULL;
const char* niceName = NULL;
const char* className = NULL;
while (i < argc) {
const char* arg = argv[i++];
if (!parentDir) {
parentDir = arg;
} else if (strcmp(arg, "--zygote") == 0) {
zygote = true;
niceName = "zygote";
} else if (strcmp(arg, "--start-system-server") == 0) {
startSystemServer = true;
} else if (strcmp(arg, "--application") == 0) {
application = true;
} else if (strncmp(arg, "--nice-name=", 12) == 0) {
niceName = arg + 12;
} else {
className = arg;
break;
}
}
if (niceName && *niceName) {
setArgv0(argv0, niceName);
set_process_name(niceName);
}
runtime.mParentDir = parentDir;
if (zygote) {
runtime.start("com.android.internal.os.ZygoteInit",
startSystemServer ? "start-system-server" : "");
} else if (className) {
// Remainder of args get passed to startup class main()
runtime.mClassName = className;
runtime.mArgC = argc - i;
runtime.mArgV = argv + i;
runtime.start("com.android.internal.os.RuntimeInit",
application ? "application" : "tool");
} else {
fprintf(stderr, "Error: no class name or --zygote supplied.\n");
app_usage();
LOG_ALWAYS_FATAL("app_process: no class name or --zygote supplied.");
return 10;
}
}
main函数首先分析参数,决定下一步要做什么:
while (i < argc) {
const char* arg = argv[i++];
if (!parentDir) {
parentDir = arg;
} else if (strcmp(arg, "--zygote") == 0) {
zygote = true;
niceName = "zygote";
} else if (strcmp(arg, "--start-system-server") == 0) {
startSystemServer = true;
} else if (strcmp(arg, "--application") == 0) {
application = true;
} else if (strncmp(arg, "--nice-name=", 12) == 0) {
niceName = arg + 12;
} else {
className = arg;
break;
}
}
上一步中,我们知道Init进程启动Zygote进程的时候,包含了--zygote参数和--start-system-server参数,所以,现在zygote和startSystemServer都为true,所以接下来执行:
if (zygote) {
runtime.start("com.android.internal.os.ZygoteInit",
startSystemServer ? "start-system-server" : "");
} else if (className) {
......
} else {
......
}
这次的参数分别为“com.android.internal.os.ZygoteInit”和“start-system-server”。
AndroidRunTime
runtime的类型为AppRunTime,AppRumTime的定义(frameworks/base/cmds/app_process/app_main.cpp):
class AppRuntime : public AndroidRuntime
{
public:
AppRuntime()
: mParentDir(NULL)
, mClassName(NULL)
, mClass(NULL)
, mArgC(0)
, mArgV(NULL)
{
}
#if 0
// this appears to be unused
const char* getParentDir() const
{
return mParentDir;
}
#endif
const char* getClassName() const
{
return mClassName;
}
virtual void onVmCreated(JNIEnv* env)
{
if (mClassName == NULL) {
return; // Zygote. Nothing to do here.
}
/*
* This is a little awkward because the JNI FindClass call uses the
* class loader associated with the native method we're executing in.
* If called in onStarted (from RuntimeInit.finishInit because we're
* launching "am", for example), FindClass would see that we're calling
* from a boot class' native method, and so wouldn't look for the class
* we're trying to look up in CLASSPATH. Unfortunately it needs to,
* because the "am" classes are not boot classes.
*
* The easiest fix is to call FindClass here, early on before we start
* executing boot class Java code and thereby deny ourselves access to
* non-boot classes.
*/
char* slashClassName = toSlashClassName(mClassName);
mClass = env->FindClass(slashClassName);
if (mClass == NULL) {
ALOGE("ERROR: could not find class '%s'\n", mClassName);
}
free(slashClassName);
mClass = reinterpret_cast<jclass>(env->NewGlobalRef(mClass));
}
virtual void onStarted()
{
sp<ProcessState> proc = ProcessState::self();
ALOGV("App process: starting thread pool.\n");
proc->startThreadPool();
AndroidRuntime* ar = AndroidRuntime::getRuntime();
ar->callMain(mClassName, mClass, mArgC, mArgV);
IPCThreadState::self()->stopProcess();
}
virtual void onZygoteInit()
{
sp<ProcessState> proc = ProcessState::self();
ALOGV("App process: starting thread pool.\n");
proc->startThreadPool();
}
virtual void onExit(int code)
{
if (mClassName == NULL) {
// if zygote
IPCThreadState::self()->stopProcess();
}
AndroidRuntime::onExit(code);
}
const char* mParentDir;
const char* mClassName;
jclass mClass;
int mArgC;
const char* const* mArgV;
};
}
可以看到,AppRuntime并没有重载start函数,所以,目前会执行其父类AndroidRuntime的start函数(frameworks/base/core/jni/AndroidRunTime.cpp):
/*
* Start the Android runtime. This involves starting the virtual machine
* and calling the "static void main(String[] args)" method in the class
* named by "className".
*
* Passes the main function two arguments, the class name and the specified
* options string.
*/
void AndroidRuntime::start(const char* className, const char* options)
{
ALOGD("\n>>>>>> AndroidRuntime START %s <<<<<<\n",
className != NULL ? className : "(unknown)");
blockSigpipe();
/*
* 'startSystemServer == true' means runtime is obsolete and not run from
* init.rc anymore, so we print out the boot start event here.
*/
if (strcmp(options, "start-system-server") == 0) {
/* track our progress through the boot sequence */
const int LOG_BOOT_PROGRESS_START = 3000;
LOG_EVENT_LONG(LOG_BOOT_PROGRESS_START,
ns2ms(systemTime(SYSTEM_TIME_MONOTONIC)));
}
const char* rootDir = getenv("ANDROID_ROOT");
if (rootDir == NULL) {
rootDir = "/system";
if (!hasDir("/system")) {
LOG_FATAL("No root directory specified, and /android does not exist.");
return;
}
setenv("ANDROID_ROOT", rootDir, 1);
}
//const char* kernelHack = getenv("LD_ASSUME_KERNEL");
//ALOGD("Found LD_ASSUME_KERNEL='%s'\n", kernelHack);
/* start the virtual machine */
JNIEnv* env;
if (startVm(&mJavaVM, &env) != 0) {
return;
}
onVmCreated(env);
/*
* Register android functions.
*/
if (startReg(env) < 0) {
ALOGE("Unable to register all android natives\n");
return;
}
/*
* We want to call main() with a String array with arguments in it.
* At present we have two arguments, the class name and an option string.
* Create an array to hold them.
*/
jclass stringClass;
jobjectArray strArray;
jstring classNameStr;
jstring optionsStr;
stringClass = env->FindClass("java/lang/String");
assert(stringClass != NULL);
strArray = env->NewObjectArray(2, stringClass, NULL);
assert(strArray != NULL);
classNameStr = env->NewStringUTF(className);
assert(classNameStr != NULL);
env->SetObjectArrayElement(strArray, 0, classNameStr);
optionsStr = env->NewStringUTF(options);
env->SetObjectArrayElement(strArray, 1, optionsStr);
/*
* Start VM. This thread becomes the main thread of the VM, and will
* not return until the VM exits.
*/
char* slashClassName = toSlashClassName(className);
jclass startClass = env->FindClass(slashClassName);
if (startClass == NULL) {
ALOGE("JavaVM unable to locate class '%s'\n", slashClassName);
/* keep going */
} else {
jmethodID startMeth = env->GetStaticMethodID(startClass, "main",
"([Ljava/lang/String;)V");
if (startMeth == NULL) {
ALOGE("JavaVM unable to find main() in '%s'\n", className);
/* keep going */
} else {
env->CallStaticVoidMethod(startClass, startMeth, strArray);
#if 0
if (env->ExceptionCheck())
threadExitUncaughtException(env);
#endif
}
}
free(slashClassName);
ALOGD("Shutting down VM\n");
if (mJavaVM->DetachCurrentThread() != JNI_OK)
ALOGW("Warning: unable to detach main thread\n");
if (mJavaVM->DestroyJavaVM() != 0)
ALOGW("Warning: VM did not shut down cleanly\n");
}
AndroidRumTime.start主要做了三项工作:
- startVM启动Android虚拟机,并呼叫onVMCreate函数。前面,我们有看到AppRuntime有重载onVMCreated函数(不过,对于app_process作为Zygote进程启动的情况,不做任何处理就返回)
- startReg注册需要的Native函数,基本Android的每个模块都有一些native实现需要和Java代码关联起来,事先注册能够提高性能,(另一种方案是第一次调用的时候查找实现函数并完成注册)
- 查找className指定的class的main函数,并以options为参数,调用main函数。这里不对代码做解释,用java写过反射调用的人应该基本能看懂。
runtime.start最后会调用ZygoteIni的.main函数,参数为--start-system-server。
因为已经启动了java虚拟机,接下来可以执行Java代码了,所以,我们将进入下一个世界。
ZygoteInit
ZygoteInit从类名看,从现在开始是真正的“Zygote”(frameworks/base/core/java/com/android/internal/os/ZygoteInit.java):
public static void main(String argv[]) {
try {
// Start profiling the zygote initialization.
SamplingProfilerIntegration.start();
registerZygoteSocket();
EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_START,
SystemClock.uptimeMillis());
preload();
EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_END,
SystemClock.uptimeMillis());
// Finish profiling the zygote initialization.
SamplingProfilerIntegration.writeZygoteSnapshot();
// Do an initial gc to clean up after startup
gc();
// If requested, start system server directly from Zygote
if (argv.length != 2) {
throw new RuntimeException(argv[0] + USAGE_STRING);
}
if (argv[1].equals("start-system-server")) {
startSystemServer();
} else if (!argv[1].equals("")) {
throw new RuntimeException(argv[0] + USAGE_STRING);
}
Log.i(TAG, "Accepting command socket connections");
if (ZYGOTE_FORK_MODE) {
runForkMode();
} else {
runSelectLoopMode();
}
closeServerSocket();
} catch (MethodAndArgsCaller caller) {
caller.run();
} catch (RuntimeException ex) {
Log.e(TAG, "Zygote died with exception", ex);
closeServerSocket();
throw ex;
}
}
从代码看,main函数完成了四项工作:
- registerZygoteSocket
- preload
- startSystemServer,因为之前AndroidRuntime是以“start-system-server”为参数调用main函数的,所以这里会执行startSystemServer
- runSelectLoopMode,ZYGOTE_FORK_MODE常量恒定为false,所以会执行runSelectLoopMode
先看一段“客户端“的代码(frameworks/base/core/java/android/os/Process.java):
public static final ProcessStartResult start(final String processClass,
final String niceName,
int uid, int gid, int[] gids,
int debugFlags, int mountExternal,
int targetSdkVersion,
String seInfo,
String[] zygoteArgs) {
try {
return startViaZygote(processClass, niceName, uid, gid, gids,
debugFlags, mountExternal, targetSdkVersion, seInfo, zygoteArgs);
} catch (ZygoteStartFailedEx ex) {
Log.e(LOG_TAG,
"Starting VM process through Zygote failed");
throw new RuntimeException(
"Starting VM process through Zygote failed", ex);
}
}
private static void openZygoteSocketIfNeeded()
throws ZygoteStartFailedEx {
int retryCount;
if (sPreviousZygoteOpenFailed) {
/*
* If we've failed before, expect that we'll fail again and
* don't pause for retries.
*/
retryCount = 0;
} else {
retryCount = 10;
}
/*
* See bug #811181: Sometimes runtime can make it up before zygote.
* Really, we'd like to do something better to avoid this condition,
* but for now just wait a bit...
*/
for (int retry = 0
; (sZygoteSocket == null) && (retry < (retryCount + 1))
; retry++ ) {
if (retry > 0) {
try {
Log.i("Zygote", "Zygote not up yet, sleeping...");
Thread.sleep(ZYGOTE_RETRY_MILLIS);
} catch (InterruptedException ex) {
// should never happen
}
}
try {
sZygoteSocket = new LocalSocket();
sZygoteSocket.connect(new LocalSocketAddress(ZYGOTE_SOCKET,
LocalSocketAddress.Namespace.RESERVED));
sZygoteInputStream
= new DataInputStream(sZygoteSocket.getInputStream());
sZygoteWriter =
new BufferedWriter(
new OutputStreamWriter(
sZygoteSocket.getOutputStream()),
256);
Log.i("Zygote", "Process: zygote socket opened");
sPreviousZygoteOpenFailed = false;
break;
} catch (IOException ex) {
if (sZygoteSocket != null) {
try {
sZygoteSocket.close();
} catch (IOException ex2) {
Log.e(LOG_TAG,"I/O exception on close after exception",
ex2);
}
}
sZygoteSocket = null;
}
}
if (sZygoteSocket == null) {
sPreviousZygoteOpenFailed = true;
throw new ZygoteStartFailedEx("connect failed");
}
}
private static ProcessStartResult zygoteSendArgsAndGetResult(ArrayList<String> args)
throws ZygoteStartFailedEx {
openZygoteSocketIfNeeded();
try {
sZygoteWriter.write(Integer.toString(args.size()));
sZygoteWriter.newLine();
int sz = args.size();
for (int i = 0; i < sz; i++) {
String arg = args.get(i);
if (arg.indexOf('\n') >= 0) {
throw new ZygoteStartFailedEx(
"embedded newlines not allowed");
}
sZygoteWriter.write(arg);
sZygoteWriter.newLine();
}
sZygoteWriter.flush();
// Should there be a timeout on this?
ProcessStartResult result = new ProcessStartResult();
result.pid = sZygoteInputStream.readInt();
if (result.pid < 0) {
throw new ZygoteStartFailedEx("fork() failed");
}
result.usingWrapper = sZygoteInputStream.readBoolean();
return result;
} catch (IOException ex) {
try {
if (sZygoteSocket != null) {
sZygoteSocket.close();
}
} catch (IOException ex2) {
// we're going to fail anyway
Log.e(LOG_TAG,"I/O exception on routine close", ex2);
}
sZygoteSocket = null;
throw new ZygoteStartFailedEx(ex);
}
}
这段代码是framework层通知Zygote进程进行fork用的,代码略长,也没有必要仔细分析,只要看明白一点即可:
framework层会通过Unix domain socket连接到Zygote进程,并发送字符串型的参数。
registerZygoteSocket
刚才有看到Process会通过domain socket连接到Zygote进程,那Zygote进程理所当然的要监听这个socket端口了,这就是registerZygoteSocket函数要做的(或者说它做了一半):
/**
* Registers a server socket for zygote command connections
*
* @throws RuntimeException when open fails
*/
private static void registerZygoteSocket() {
if (sServerSocket == null) {
int fileDesc;
try {
String env = System.getenv(ANDROID_SOCKET_ENV);
fileDesc = Integer.parseInt(env);
} catch (RuntimeException ex) {
throw new RuntimeException(
ANDROID_SOCKET_ENV + " unset or invalid", ex);
}
try {
sServerSocket = new LocalServerSocket(
createFileDescriptor(fileDesc));
} catch (IOException ex) {
throw new RuntimeException(
"Error binding to local socket '" + fileDesc + "'", ex);
}
}
}
registerZygoteSocket函数中,先从
private static final String ANDROID_SOCKET_ENV = "ANDROID_SOCKET_zygote";
环境变量中,读取一个int值作为文件描述父,创建文件描述符,为什么呢?
如果还记得init.rc的配置到话,一切就可以解释了:
socket zygote stream 660 root system
init进程在处理这一行属性时,会先在dev/socket下创建一个名为zygote的设备文件,然后打开这个文件,并把文件描述符保存到环境变量中。环境变量名的规则为"ANDROID_SOCKET_"+第一参数(这里为zygote)。所以,app_main就可以通过指定的环境变量来获取这个文件描述符,并由此创建LocalServerSocket。
preload
因为所有的Android进程都是从Zygote进程fork出来的,而子进程会继承Zygote的资源。换言之,如果Zygote进程持有了资源,所有android进程就都有了。所以,Zygote会预加载一些系统资源,以加速子进程的启动速度(子进程以写时复制的机制进行资源共享,所以不必担心内存占用问题。另外Android系统的启动时间可能有大概40%的时间耗费在这个preload函数上,但是从整体上来说,这是利大于弊的)。
static void preload() {
preloadClasses();//加载java类
preloadResources();//加载icon,字串等资源
}
startSystemServer
Zygote进程启动以后,会由Zygote进程fork出SystemServer,继续启动系统:
/**
* Prepare the arguments and fork for the system server process.
*/
private static boolean startSystemServer()
throws MethodAndArgsCaller, RuntimeException {
/* Hardcoded command line to start the system server */
String args[] = {
"--setuid=1000",
"--setgid=1000",
"--setgroups=1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1018,3001,3002,3003,3006,3007",
"--capabilities=130104352,130104352",
"--runtime-init",
"--nice-name=system_server",
"com.android.server.SystemServer",
};
ZygoteConnection.Arguments parsedArgs = null;
int pid;
try {
parsedArgs = new ZygoteConnection.Arguments(args);
ZygoteConnection.applyDebuggerSystemProperty(parsedArgs);
ZygoteConnection.applyInvokeWithSystemProperty(parsedArgs);
/* Request to fork the system server process */
pid = Zygote.forkSystemServer(
parsedArgs.uid, parsedArgs.gid,
parsedArgs.gids,
parsedArgs.debugFlags,
null,
parsedArgs.permittedCapabilities,
parsedArgs.effectiveCapabilities);
} catch (IllegalArgumentException ex) {
throw new RuntimeException(ex);
}
/* For child process */
if (pid == 0) {
handleSystemServerProcess(parsedArgs);
}
return true;
}
首先,确定SystemServer的启动参数,然后,调用Zygote.forkSysetmServer函数,开始执行fork操作。这一块和SystemServer的启动过程比较相关,留待后面的文章分析。暂时只需要记住SystemServer是在这个位置启动的就好,其他的可以暂时忽略。
runSelectLoopMode
前面,我们已经得到LocalServerLocket了,现在需要开始监听LocalServerScoket了:
/**
* Runs the zygote process's select loop. Accepts new connections as
* they happen, and reads commands from connections one spawn-request's
* worth at a time.
*
* @throws MethodAndArgsCaller in a child process when a main() should
* be executed.
*/
private static void runSelectLoopMode() throws MethodAndArgsCaller {
ArrayList<FileDescriptor> fds = new ArrayList();
ArrayList<ZygoteConnection> peers = new ArrayList();
FileDescriptor[] fdArray = new FileDescriptor[4];
fds.add(sServerSocket.getFileDescriptor());
peers.add(null);
int loopCount = GC_LOOP_COUNT;
while (true) {
int index;
/*
* Call gc() before we block in select().
* It's work that has to be done anyway, and it's better
* to avoid making every child do it. It will also
* madvise() any free memory as a side-effect.
*
* Don't call it every time, because walking the entire
* heap is a lot of overhead to free a few hundred bytes.
*/
if (loopCount <= 0) {
gc();
loopCount = GC_LOOP_COUNT;
} else {
loopCount--;
}
try {
fdArray = fds.toArray(fdArray);
index = selectReadable(fdArray);
} catch (IOException ex) {
throw new RuntimeException("Error in select()", ex);
}
if (index < 0) {
throw new RuntimeException("Error in select()");
} else if (index == 0) {//index=0 为监听端口
ZygoteConnection newPeer = acceptCommandPeer();
peers.add(newPeer);
fds.add(newPeer.getFileDesciptor());
} else {
boolean done;
done = peers.get(index).runOnce();
if (done) {
peers.remove(index);
fds.remove(index);
}
}
}
}
其中,selectReadable函数为native函数,实现了一个多连接的等待功能:
static jint com_android_internal_os_ZygoteInit_selectReadable (
JNIEnv *env, jobject clazz, jobjectArray fds)
{
if (fds == NULL) {
jniThrowNullPointerException(env, "fds == null");
return -1;
}
jsize length = env->GetArrayLength(fds);
fd_set fdset;//fdset为连接集合
if (env->ExceptionOccurred() != NULL) {
return -1;
}
FD_ZERO(&fdset);//清空fdset
int nfds = 0;
for (jsize i = 0; i < length; i++) {//把所有的连接保存到fdset中
jobject fdObj = env->GetObjectArrayElement(fds, i);
if (env->ExceptionOccurred() != NULL) {
return -1;
}
if (fdObj == NULL) {
continue;
}
int fd = jniGetFDFromFileDescriptor(env, fdObj);
if (env->ExceptionOccurred() != NULL) {
return -1;
}
FD_SET(fd, &fdset);//保存fd到fdset中
if (fd >= nfds) {
nfds = fd + 1;
}
}
int err;
do {
err = select (nfds, &fdset, NULL, NULL, NULL);//fdset中任何一个连接有数据可读,则返回该连接在fdset的序号,否则等待
} while (err < 0 && errno == EINTR);
if (err < 0) {
jniThrowIOException(env, errno);
return -1;
}
for (jsize i = 0; i < length; i++) {
jobject fdObj = env->GetObjectArrayElement(fds, i);
if (env->ExceptionOccurred() != NULL) {
return -1;
}
if (fdObj == NULL) {
continue;
}
int fd = jniGetFDFromFileDescriptor(env, fdObj);
if (env->ExceptionOccurred() != NULL) {
return -1;
}
if (FD_ISSET(fd, &fdset)) {
return (jint)i;//返回有数据可读到连接的序号
}
}
return -1;
}
有数据可读的连接的序号为0,则为监听端口,应该accept这个连接:
/**
* Waits for and accepts a single command connection. Throws
* RuntimeException on failure.
*/
private static ZygoteConnection acceptCommandPeer() {
try {
return new ZygoteConnection(sServerSocket.accept());
} catch (IOException ex) {
throw new RuntimeException(
"IOException during accept()", ex);
}
}
有数据可读的连接的序号不为0,则说明有fork指令过来,执行ZygoteConnection.runOnce(),并视需要移除这个连接。这个过程和App进程的启动相关,之后的文章会做进一步分析。
总结
- Zygote进程由Init进程启动
- Zygote进程的实体为app_process
- Zygote初始化Android虚拟机以后,开始执行Java类ZygoteInit
- ZygoteInit会启动SystemServer
- ZygoteInit会监听domain socket “dev/socket/zygote”,以执行framework层指定的fork操作