1、启动相关类
2、启动时序
3、启动分析
3.1 启动流程了什么
- 解析配置,包含配置文件及命令行参数
- 检查内部和外部参数,包含JVM参数、操作系统内核参数等
- 初始化内部资源,创建内部模块
- 启动Node和keepalive线程
3.2 启动流程分析
3.2.1 解析配置
支持的配置有
参数 | 含义 |
-h, --help | 打印帮助信息 |
-s, --silent | 终端输出最少信息 |
-v, --verbose | 终端输出详细信息 |
-E | 设置配置项。es.path.conf必须配置。 没有配置情况时 path.data,默认使用es.path.data path.home,默认使用es.path.home path.logs,默认使用es.path.logs |
-V, --version | 输出es版本信息 |
-d, --daemonize | 后台启动es |
-p, --pidfile | 启动时在特定目录下创建pid文件,其中保存了当前进程的pid,之后可以通过查看这个pid文件来关闭进程 |
-q, --quiet | 控制台关闭标准输出/错误流 |
3.2.2 创建初始环境
通过EnvironmentAwareCommand#createEnv来完成
protected final Environment createEnv(final Settings baseSettings, final Map<String, String> settings) throws UserException {
final String esPathConf = System.getProperty("es.path.conf");
if (esPathConf == null) {
throw new UserException(ExitCodes.CONFIG, "the system property [es.path.conf] must be set");
}
return InternalSettingsPreparer.prepareEnvironment(baseSettings, settings,
getConfigPath(esPathConf),
// HOSTNAME is set by elasticsearch-env and elasticsearch-env.bat so it is always available
() -> System.getenv("HOSTNAME"));
}
首先获取系统属性es.path.conf,然后通过InternalSettingsPreparer.prepareEnvironment来创建环境
public static Environment prepareEnvironment(Settings input, Map<String, String> properties,
Path configPath, Supplier<String> defaultNodeName) {
// just create enough settings to build the environment, to get the config dir
Settings.Builder output = Settings.builder();
initializeSettings(output, input, properties);
Environment environment = new Environment(output.build(), configPath);
if (Files.exists(environment.configFile().resolve("elasticsearch.yaml"))) {
throw new SettingsException("elasticsearch.yaml was deprecated in 5.5.0 and must be renamed to elasticsearch.yml");
}
if (Files.exists(environment.configFile().resolve("elasticsearch.json"))) {
throw new SettingsException("elasticsearch.json was deprecated in 5.5.0 and must be converted to elasticsearch.yml");
}
output = Settings.builder(); // start with a fresh output
Path path = environment.configFile().resolve("elasticsearch.yml");
if (Files.exists(path)) {
try {
output.loadFromPath(path);
} catch (IOException e) {
throw new SettingsException("Failed to load settings from " + path.toString(), e);
}
}
// re-initialize settings now that the config file has been loaded
initializeSettings(output, input, properties);
finalizeSettings(output, defaultNodeName);
return new Environment(output.build(), configPath);
}
在创建环境过程中,获取settings中的 path.home。
如果configPath不为空就用configPath,否则使用path.home对应文件中的config。
java.io.tmpdir对应的临时文件目录不能为空,对应的是tmpFile。
从home路径文件中解析plugins得到 pluginFile
dataFiles从path.data或者home文件中的data得到。
shardDataFile从settings中的path.shared_data得到
repoFiles从settings中的path.repo得到
logoFile从settings中的path.logs或者home文件中的logs得到
pidFile从settings中的node.pidfile得到
binFile, libFile,modulesFile分别从homeFile中的bin,lib,modules得到
最后统一放到Environment的settings中。
Environment(final Settings settings, final Path configPath, final Path tmpPath) {
final Path homeFile;
if (PATH_HOME_SETTING.exists(settings)) {
homeFile = PathUtils.get(PATH_HOME_SETTING.get(settings)).toAbsolutePath().normalize();
} else {
throw new IllegalStateException(PATH_HOME_SETTING.getKey() + " is not configured");
}
if (configPath != null) {
configFile = configPath.toAbsolutePath().normalize();
} else {
configFile = homeFile.resolve("config");
}
tmpFile = Objects.requireNonNull(tmpPath);
pluginsFile = homeFile.resolve("plugins");
List<String> dataPaths = PATH_DATA_SETTING.get(settings);
if (dataPaths.isEmpty() == false) {
dataFiles = new Path[dataPaths.size()];
for (int i = 0; i < dataPaths.size(); i++) {
dataFiles[i] = PathUtils.get(dataPaths.get(i)).toAbsolutePath().normalize();
}
} else {
dataFiles = new Path[]{homeFile.resolve("data")};
}
if (PATH_SHARED_DATA_SETTING.exists(settings)) {
sharedDataFile = PathUtils.get(PATH_SHARED_DATA_SETTING.get(settings)).toAbsolutePath().normalize();
} else {
sharedDataFile = null;
}
List<String> repoPaths = PATH_REPO_SETTING.get(settings);
if (repoPaths.isEmpty()) {
repoFiles = EMPTY_PATH_ARRAY;
} else {
repoFiles = new Path[repoPaths.size()];
for (int i = 0; i < repoPaths.size(); i++) {
repoFiles[i] = PathUtils.get(repoPaths.get(i)).toAbsolutePath().normalize();
}
}
// this is trappy, Setting#get(Settings) will get a fallback setting yet return false for Settings#exists(Settings)
if (PATH_LOGS_SETTING.exists(settings)) {
logsFile = PathUtils.get(PATH_LOGS_SETTING.get(settings)).toAbsolutePath().normalize();
} else {
logsFile = homeFile.resolve("logs");
}
if (NODE_PIDFILE_SETTING.exists(settings)) {
pidFile = PathUtils.get(NODE_PIDFILE_SETTING.get(settings)).toAbsolutePath().normalize();
} else {
pidFile = null;
}
binFile = homeFile.resolve("bin");
libFile = homeFile.resolve("lib");
modulesFile = homeFile.resolve("modules");
final Settings.Builder finalSettings = Settings.builder().put(settings);
if (PATH_DATA_SETTING.exists(settings)) {
finalSettings.putList(PATH_DATA_SETTING.getKey(), Arrays.stream(dataFiles).map(Path::toString).collect(Collectors.toList()));
}
finalSettings.put(PATH_HOME_SETTING.getKey(), homeFile);
finalSettings.put(PATH_LOGS_SETTING.getKey(), logsFile.toString());
if (PATH_REPO_SETTING.exists(settings)) {
finalSettings.putList(
Environment.PATH_REPO_SETTING.getKey(),
Arrays.stream(repoFiles).map(Path::toString).collect(Collectors.toList()));
}
if (PATH_SHARED_DATA_SETTING.exists(settings)) {
assert sharedDataFile != null;
finalSettings.put(Environment.PATH_SHARED_DATA_SETTING.getKey(), sharedDataFile.toString());
}
if (NODE_PIDFILE_SETTING.exists(settings)) {
assert pidFile != null;
finalSettings.put(Environment.NODE_PIDFILE_SETTING.getKey(), pidFile.toString());
}
this.settings = finalSettings.build();
}
3.2.3 加载安全配置
调用Bootstrap.loadSecureSettings。
初始环境中的configFile对应目录下的elasticsearch.keystore文件中加载KeyStore。
static SecureSettings loadSecureSettings(Environment initialEnv, InputStream stdin) throws BootstrapException {
final KeyStoreWrapper keystore;
try {
keystore = KeyStoreWrapper.load(initialEnv.configFile());
} catch (IOException e) {
throw new BootstrapException(e);
}
SecureString password;
try {
if (keystore != null && keystore.hasPassword()) {
password = readPassphrase(stdin, KeyStoreAwareCommand.MAX_PASSPHRASE_LENGTH);
} else {
password = new SecureString(new char[0]);
}
} catch (IOException e) {
throw new BootstrapException(e);
}
try (password) {
if (keystore == null) {
final KeyStoreWrapper keyStoreWrapper = KeyStoreWrapper.create();
keyStoreWrapper.save(initialEnv.configFile(), new char[0]);
return keyStoreWrapper;
} else {
keystore.decrypt(password.getChars());
KeyStoreWrapper.upgrade(keystore, initialEnv.configFile(), password.getChars());
}
} catch (Exception e) {
throw new BootstrapException(e);
}
return keystore;
}
3.2.4 检查内部环境
检查lucece版本,看是否对应得上。
检查Jar冲突,通过JarHell.
public static void checkJarHell(Set<URL> urls, Consumer<String> output) throws URISyntaxException, IOException {
// we don't try to be sneaky and use deprecated/internal/not portable stuff
// like sun.boot.class.path, and with jigsaw we don't yet have a way to get
// a "list" at all. So just exclude any elements underneath the java home
String javaHome = System.getProperty("java.home");
output.accept("java.home: " + javaHome);
final Map<String,Path> clazzes = new HashMap<>(32768);
Set<Path> seenJars = new HashSet<>();
for (final URL url : urls) {
final Path path = PathUtils.get(url.toURI());
// exclude system resources
if (path.startsWith(javaHome)) {
output.accept("excluding system resource: " + path);
continue;
}
if (path.toString().endsWith(".jar")) {
if (seenJars.add(path) == false) {
throw new IllegalStateException("jar hell!" + System.lineSeparator() +
"duplicate jar on classpath: " + path);
}
output.accept("examining jar: " + path);
try (JarFile file = new JarFile(path.toString())) {
Manifest manifest = file.getManifest();
if (manifest != null) {
checkManifest(manifest, path);
}
// inspect entries
Enumeration<JarEntry> elements = file.entries();
while (elements.hasMoreElements()) {
String entry = elements.nextElement().getName();
if (entry.endsWith(".class")) {
// for jar format, the separator is defined as /
entry = entry.replace('/', '.').substring(0, entry.length() - 6);
checkClass(clazzes, entry, path);
}
}
}
} else {
output.accept("examining directory: " + path);
// case for tests: where we have class files in the classpath
final Path root = PathUtils.get(url.toURI());
final String sep = root.getFileSystem().getSeparator();
// don't try and walk class or resource directories that don't exist
// gradle will add these to the classpath even if they never get created
if (Files.exists(root)) {
Files.walkFileTree(root, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
String entry = root.relativize(file).toString();
if (entry.endsWith(".class")) {
// normalize with the os separator, remove '.class'
entry = entry.replace(sep, ".").substring(0, entry.length() - ".class".length());
checkClass(clazzes, entry, path);
}
return super.visitFile(file, attrs);
}
});
}
}
}
}
3.2.5 检查外部环境
通过BootstrapChecks#check方法来完成。
HeapSizeCheck:堆大小检查。如果JVM的初始堆大小-Xms与最大堆大小-Xmx值不同,在使用期间JVM堆大小调整时可能会出现停顿。如果开启了锁内存,JVM在启动时锁定堆的初始大小,如果初始堆大小与最大堆大小不同,在堆大小发生变化后,可能无法保证所有JVM堆都锁定在内存中。
FileDescriptorCheck:文件描述符的最大值不能小于65536(默认值),否则检查失败
MlockallCheck:内存锁定检查。如果开启bootstrap.memory_lock让es启动时锁定内存,在锁定失败时,检查失败。
MaxNumberOfThreadsCheck:最大线程数检查。如果线程数小于默认值4096,检查失败。
MaxSizeVirtualMemoryCheck:最大虚拟内存检查。如果getMaxSizeVirtualMemory() != getRlimInfinity(),其中getMaxSizeVirtualMemory()等于JNANatives.MAX_SIZE_VIRTUAL_MEMORY,getRlimInfinity()等于JNACLibrary.RLIM_INFINITY,检查失败。
MaxFileSizeCheck:最大文件大小检查。JNANatives.MAX_FILE_SIZE不等于JNACLibrary.RLIM_INFINITY,检查失败。
MaxMapCountCheck:最大映射数检查。在开启node.store.allow_mmap情况下,最大映射数小于1<<18时,检查失败。
ClientJvmCheck:jvm客户端配置检查。client JVM模式时,检查失败。
UseServialGCCheck:串行收集检查。JVM使用串行收集器时,检查失败。
SystemCallFilterCheck:系统调用过滤器检查。在开启bootstrap.system_call_filter但是没有安装过滤器时,检查失败。
OnErrorCheck, OnOutOfMemoryErrorCheck:错误检查。如果安装了系统调用过滤器并且有错误,则检查失败。
EarlyAccessCheck:如果JVM vendor为"Oracle Corporation"并且版本信息中包含-ea,检查失败。
G1GCCheck:G1收集器检查。java8版本使用G1收集器,检查失败。
AllPermissionCheck:所有权限检查。开启所有权限,检查失败。
DiscoveryConfiguredCheck:发现配置检查。如果discovery.type的配置为zen或者配置中没有discovery.seed_providers,discovery.seed_hosts,cluster.initial_master_nodes,则检查失败。
3.2.6 启动内部模块
创建Node,子模块在Node中创建。启动时调用子模块的start。
node的子模块图
public Node start() throws NodeValidationException {
if (lifecycle.moveToStarted() == false) {
return this;
}
logger.info("starting ...");
pluginLifecycleComponents.forEach(LifecycleComponent::start);
injector.getInstance(MappingUpdatedAction.class).setClient(client);
injector.getInstance(IndicesService.class).start();
injector.getInstance(IndicesClusterStateService.class).start();
injector.getInstance(SnapshotsService.class).start();
injector.getInstance(SnapshotShardsService.class).start();
injector.getInstance(RepositoriesService.class).start();
injector.getInstance(SearchService.class).start();
injector.getInstance(FsHealthService.class).start();
nodeService.getMonitorService().start();
final ClusterService clusterService = injector.getInstance(ClusterService.class);
final NodeConnectionsService nodeConnectionsService = injector.getInstance(NodeConnectionsService.class);
nodeConnectionsService.start();
clusterService.setNodeConnectionsService(nodeConnectionsService);
injector.getInstance(GatewayService.class).start();
Discovery discovery = injector.getInstance(Discovery.class);
clusterService.getMasterService().setClusterStatePublisher(discovery::publish);
// Start the transport service now so the publish address will be added to the local disco node in ClusterService
TransportService transportService = injector.getInstance(TransportService.class);
transportService.getTaskManager().setTaskResultsService(injector.getInstance(TaskResultsService.class));
transportService.getTaskManager().setTaskCancellationService(new TaskCancellationService(transportService));
transportService.start();
assert localNodeFactory.getNode() != null;
assert transportService.getLocalNode().equals(localNodeFactory.getNode())
: "transportService has a different local node than the factory provided";
injector.getInstance(PeerRecoverySourceService.class).start();
// Load (and maybe upgrade) the metadata stored on disk
final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class);
gatewayMetaState.start(settings(), transportService, clusterService, injector.getInstance(MetaStateService.class),
injector.getInstance(IndexMetadataVerifier.class), injector.getInstance(MetadataUpgrader.class),
injector.getInstance(PersistedClusterStateService.class));
if (Assertions.ENABLED) {
try {
assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty();
final NodeMetadata nodeMetadata = NodeMetadata.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY,
nodeEnvironment.nodeDataPaths());
assert nodeMetadata != null;
assert nodeMetadata.nodeVersion().equals(Version.CURRENT);
assert nodeMetadata.nodeId().equals(localNodeFactory.getNode().getId());
} catch (IOException e) {
assert false : e;
}
}
// we load the global state here (the persistent part of the cluster state stored on disk) to
// pass it to the bootstrap checks to allow plugins to enforce certain preconditions based on the recovered state.
final Metadata onDiskMetadata = gatewayMetaState.getPersistedState().getLastAcceptedState().metadata();
assert onDiskMetadata != null : "metadata is null but shouldn't"; // this is never null
validateNodeBeforeAcceptingRequests(new BootstrapContext(environment, onDiskMetadata), transportService.boundAddress(),
pluginsService.filterPlugins(Plugin.class).stream()
.flatMap(p -> p.getBootstrapChecks().stream()).collect(Collectors.toList()));
clusterService.addStateApplier(transportService.getTaskManager());
// start after transport service so the local disco is known
discovery.start(); // start before cluster service so that it can set initial state on ClusterApplierService
clusterService.start();
assert clusterService.localNode().equals(localNodeFactory.getNode())
: "clusterService has a different local node than the factory provided";
transportService.acceptIncomingRequests();
discovery.startInitialJoin();
final TimeValue initialStateTimeout = INITIAL_STATE_TIMEOUT_SETTING.get(settings());
configureNodeAndClusterIdStateListener(clusterService);
if (initialStateTimeout.millis() > 0) {
final ThreadPool thread = injector.getInstance(ThreadPool.class);
ClusterState clusterState = clusterService.state();
ClusterStateObserver observer =
new ClusterStateObserver(clusterState, clusterService, null, logger, thread.getThreadContext());
if (clusterState.nodes().getMasterNodeId() == null) {
logger.debug("waiting to join the cluster. timeout [{}]", initialStateTimeout);
final CountDownLatch latch = new CountDownLatch(1);
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
latch.countDown();
}
@Override
public void onClusterServiceClose() {
latch.countDown();
}
@Override
public void onTimeout(TimeValue timeout) {
logger.warn("timed out while waiting for initial discovery state - timeout: {}",
initialStateTimeout);
latch.countDown();
}
}, state -> state.nodes().getMasterNodeId() != null, initialStateTimeout);
try {
latch.await();
} catch (InterruptedException e) {
throw new ElasticsearchTimeoutException("Interrupted while waiting for initial discovery state");
}
}
}
injector.getInstance(HttpServerTransport.class).start();
if (WRITE_PORTS_FILE_SETTING.get(settings())) {
TransportService transport = injector.getInstance(TransportService.class);
writePortsFile("transport", transport.boundAddress());
HttpServerTransport http = injector.getInstance(HttpServerTransport.class);
writePortsFile("http", http.boundAddress());
}
logger.info("started");
pluginsService.filterPlugins(ClusterPlugin.class).forEach(ClusterPlugin::onNodeStarted);
return this;
}
3.2.7 启动keepalive线程
keepalive线程不做什么事, 等待在CounDownLatch上。
keepAliveThread = new Thread(new Runnable() {
@Override
public void run() {
try {
keepAliveLatch.await();
} catch (InterruptedException e) {
// bail out
}
}
}, "elasticsearch[keepAlive/" + Version.CURRENT + "]");
keepAliveThread.setDaemon(false);
// keep this thread alive (non daemon thread) until we shutdown
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
keepAliveLatch.countDown();
}
});