一、预备知识 —— Poco::Util::ServerApplication
服务端的主类定义在 /programs/server/Server.h
,其继承关系如上图所示。可以看出服务端是基于 Poco::Util::ServerApplication 模版类来实现的。ServerApplication 允许应用程序以 Unix 后台进程的方式运行,run()
为入口,首先调用 initialize()
进行初始化,然后执行程序主逻辑 main()
。
二、服务端启动
涉及服务端启动的主要源码路径如下,
programs/main.cpp
—— clickhouse应用程序入口
programs/server/Server.h(.cpp)
—— 服务端主类
src/Daemon/BaseDaemon.h(.cpp)
—— 后台进程主类
src/Server/*.h(.cpp)
—— 定义各种服务接口监听
2.1 服务端主程序入口
ClickHouse 的应用程序以 programs/main.cpp
作为主入口,其中注册了各个子应用的主程序。mian()
函数根据命令行参数来选择应该调用的子应用程序。服务端主程序注册如下,
#if ENABLE_CLICKHOUSE_SERVER
int mainEntryClickHouseServer(int argc, char ** argv); // 返回 DB::Server::run() 来启动服务端进程
#endif
2.2 服务端初始化 —— BaseDaemon::initialize()
DB::Server::run()
首先执行 BaseDaemon::initialize()
来初始化后台进程。其中会解析命令行参数(–config-file),首次加载本地配置文件并将合并后到配置保存到 preprocessed_configs/config.xml
。
void BaseDaemon::initialize(Application & self)
{
// 1. 关闭之前残留的 Unix 文件描述符
closeFDs();
ServerApplication::initialize(self);
// 2. 解析命令行参数,如 "--config-file=/etc/clickhouse-server/config.xml"
argsToConfig(argv(), config(), PRIO_APPLICATION - 100);
bool is_daemon = config().getBool("application.runAsDaemon", false);
if (is_daemon)
{
/** When creating pid file and looking for config, will search for paths relative to the working path of the program when started.
*/
std::string path = fs::path(config().getString("application.path")).replace_filename("");
if (0 != chdir(path.c_str()))
throw Poco::Exception("Cannot change directory to " + path);
}
// 3. 加载本地配置文件(config.xml、config.d/*.xml、conf.d/*.xml),包含 "incl" 和 "from_env" 属性导入的配置
reloadConfiguration();
/// This must be done before creation of any files (including logs).
mode_t umask_num = 0027;
if (config().has("umask"))
{
std::string umask_str = config().getString("umask");
std::stringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
stream << umask_str;
stream >> std::oct >> umask_num;
}
umask(umask_num);
// 4. 保存合并后的配置文件到 "preprocessed_configs/config.xml"
ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, "");
/// Write core dump on crash.
{
struct rlimit rlim;
if (getrlimit(RLIMIT_CORE, &rlim))
throw Poco::Exception("Cannot getrlimit");
/// 1 GiB by default. If more - it writes to disk too long.
rlim.rlim_cur = config().getUInt64("core_dump.size_limit", 1024 * 1024 * 1024);
if (rlim.rlim_cur && setrlimit(RLIMIT_CORE, &rlim))
{
/// It doesn't work under address/thread sanitizer. http://lists.llvm.org/pipermail/llvm-bugs/2013-April/027880.html
std::cerr << "Cannot set max size of core file to " + std::to_string(rlim.rlim_cur) << std::endl;
}
}
// 5. 初始化时区
/// This must be done before any usage of DateLUT. In particular, before any logging.
if (config().has("timezone"))
{
const std::string config_timezone = config().getString("timezone");
if (0 != setenv("TZ", config_timezone.data(), 1)) // NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other setenv/getenv
throw Poco::Exception("Cannot setenv TZ variable");
tzset();
DateLUT::setDefaultTimezone(config_timezone);
}
// 6. 初始化日志
std::string log_path = config().getString("logger.log", "");
if (!log_path.empty())
log_path = fs::path(log_path).replace_filename("");
// ...
}
2.3 服务端主逻辑 —— Server::main()
DB::Server::run()
之后调用 Server::main()
进入服务端主逻辑。其中会注册各种核心功能,创建全局上下文、线程池,链接 zookeeper 服务,加载 ‘from_zk’ 配置项等。
int Server::main(const std::vector<std::string> & /*args*/)
try
{
// ...
// 1. 注册各种核心功能
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ false);
registerFormats();
registerRemoteFileMetadatas();
registerSchedulerNodes();
registerResourceManagers();
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
// 2. 创建全局上下文
/** Context contains all that query execution is dependent:
* settings, available functions, data types, aggregate functions, databases, ...
*/
auto shared_context = Context::createShared();
global_context = Context::createGlobal(shared_context.get());
global_context->makeGlobalContext();
global_context->setApplicationType(Context::ApplicationType::SERVER);
// ...
// 3. 初始化全局线程池
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
// ...
// 4. 链接 zookeeper 并加载 "from_zk" 配置项
zkutil::validateZooKeeperConfig(config());
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); });
zkutil::EventPtr main_config_zk_changed_event = std::make_shared<Poco::Event>();
if (loaded_config.has_zk_includes)
{
auto old_configuration = loaded_config.configuration;
ConfigProcessor config_processor(config_path);
loaded_config = config_processor.loadConfigWithZooKeeperIncludes(
main_config_zk_node_cache, main_config_zk_changed_event, /* fallback_to_preprocessed = */ true);
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
config().removeConfiguration(old_configuration.get());
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
/// We need to reload server settings because config could be updated via zookeeper.
server_settings.loadSettingsFromConfig(config());
// ...
// 5. 初始化本地工作路径
global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = server_settings.default_database.toString();
/// Check that the process user id matches the owner of the data.
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
global_context->setPath(path_str);
StatusFile status{path / "status", StatusFile::write_full_info};
ServerUUID::load(path / "uuid", log);
// 5.1 尽可能最大化文件操作数(文件描述符fd)、线程数
/// Try to increase limit on number of open files.
{
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur == rlim.rlim_max)
{
LOG_DEBUG(log, "rlimit on number of file descriptors is {}", rlim.rlim_cur);
}
else
{
rlim_t old = rlim.rlim_cur;
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString());
else
LOG_DEBUG(log, "Set max number of file descriptors to {} (was {}).", rlim.rlim_cur, old);
}
}
/// Try to increase limit on number of threads.
{
rlimit rlim;
if (getrlimit(RLIMIT_NPROC, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur == rlim.rlim_max)
{
LOG_DEBUG(log, "rlimit on number of threads is {}", rlim.rlim_cur);
}
else
{
rlim_t old = rlim.rlim_cur;
rlim.rlim_cur = rlim.rlim_max;
int rc = setrlimit(RLIMIT_NPROC, &rlim);
if (rc != 0)
{
LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, errnoToString());
rlim.rlim_cur = old;
}
else
{
LOG_DEBUG(log, "Set max number of threads to {} (was {}).", rlim.rlim_cur, old);
}
}
if (rlim.rlim_cur < 30000)
{
global_context->addWarningMessage("Maximum number of threads is lower than 30000. There could be problems with handling a lot of simultaneous queries.");
}
}
// ...
// 6. 配置缓存
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
// ...
// 7. 创建配置重载器(定时每2秒扫描一次配置更改)
auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path,
extra_paths,
config().getString("path", ""),
std::move(main_config_zk_node_cache),
main_config_zk_changed_event,
[&](ConfigurationPtr config, bool initial_loading)
{
// ...
},
/* already_loaded = */ false); /// Reload it right now (initial loading)
// ...
// 8. 创建端口监听服务
{
std::lock_guard lock(servers_lock);
/// We should start interserver communications before (and more imporant shutdown after) tables.
/// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
/// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
/// communicate with zookeeper, execute merges, etc.
createInterserverServers(
config(),
interserver_listen_hosts,
listen_try,
server_pool,
async_metrics,
servers_to_start_before_tables,
/* start_servers= */ false);
for (auto & server : servers_to_start_before_tables)
{
server.start();
LOG_INFO(log, "Listening for {}", server.getDescription());
}
}
// 9. 初始化存储访问 access control
auto & access_control = global_context->getAccessControl();
try
{
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while setting up access control.");
throw;
}
// 10. 配置主动执行回掉函数
/// Reload config in SYSTEM RELOAD CONFIG query.
global_context->setConfigReloadCallback([&]()
{
main_config_reloader->reload();
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
});
global_context->setStopServersCallback([&](const ServerType & server_type)
{
std::lock_guard lock(servers_lock);
stopServers(servers, server_type);
});
global_context->setStartServersCallback([&](const ServerType & server_type)
{
std::lock_guard lock(servers_lock);
createServers(
config(),
listen_hosts,
listen_try,
server_pool,
async_metrics,
servers,
/* start_servers= */ true,
server_type);
});
// ...
// 11. 加载元数据
try
{
auto & database_catalog = DatabaseCatalog::instance();
/// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase();
loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context);
/// This has to be done before the initialization of system logs,
/// otherwise there is a race condition between the system database initialization
/// and creation of new tables in the database.
startupSystemTables();
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
/// Build loggers before tables startup to make log messages from tables
/// attach available in system.text_log
buildLoggers(config(), logger());
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
/// Firstly remove partially dropped databases, to avoid race with MaterializedMySQLSyncThread,
/// that may execute DROP before loadMarkedAsDroppedTables() in background,
/// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap.
database_catalog.loadMarkedAsDroppedTables();
database_catalog.createBackgroundTasks();
/// Then, load remaining databases
loadMetadata(global_context, default_database);
convertDatabasesEnginesIfNeed(global_context);
database_catalog.startupBackgroundCleanup();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);
/// Load user-defined SQL functions.
global_context->getUserDefinedSQLObjectsLoader().loadObjects();
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading metadata");
throw;
}
LOG_DEBUG(log, "Loaded metadata.");
{
// ...
// 12. 创建 DDLWorker 用于调度执行分布式DDL(on cluster)
if (has_zookeeper && config().has("distributed_ddl"))
{
/// DDL worker should be started after all tables were loaded
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
if (pool_size < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0");
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, global_context, &config(),
"distributed_ddl", "DDLWorker",
&CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID));
}
// 13. 启动端口服务
{
std::lock_guard lock(servers_lock);
for (auto & server : servers)
{
server.start();
LOG_INFO(log, "Listening for {}", server.getDescription());
}
global_context->setServerCompletelyStarted();
LOG_INFO(log, "Ready for connections.");
}
startup_watch.stop();
ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
try
{
global_context->startClusterDiscovery();
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while starting cluster discovery");
}
waitForTerminationRequest();
}
return Application::EXIT_OK;
}
catch (...)
{
/// Poco does not provide stacktrace.
tryLogCurrentException("Application");
throw;
}