Clickhouse配置文件详解

​​​​​​​

本文为Clickhouse配置文件详解,具体如何部署Clickhouse数据库请看

ClickHouse使用介绍和部署指南-CSDN博客

<clickhouse>
<include_from>/etc/clickhouse-server/metrika.xml</include_from>

    <logger>
        <!-- 日志等级:
                       - none (不打印)
          - fatal 致命的错误
          - critical 风险
          - error 错误
          - warning 警告
          - notice 提示
          - information 正常信息
          - debug 调试
          - trace 低级别日志
          - test (测试日志,不要用在生产上)
        -->
        <level>trace</level>
        <!-- 日志文件路径 -->
        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
        <!-- Rotation policy
                          See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
          -->
        <size>1000M</size>
        <count>10</count>
        <!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
 
        <!-- 日志结构可以指定:
                     目只支持json格式的日志,控制台会打印json字符串,如果要打开则取消下面整体formatting的注释,
        其中names是日志的格式名称,下面的是可以展示的字段名,字段名可以减少不必要的字段,无法增加自定义字段
         
        目前没必要打开 -->
        <!-- <formatting>
                         <type>json</type>
            <names>
                <date_time>date_time</date_time>
                <thread_name>thread_name</thread_name>
                <thread_id>thread_id</thread_id>
                <level>level</level>
                <query_id>query_id</query_id>
                <logger_name>logger_name</logger_name>
                <message>message</message>
                <source_file>source_file</source_file>
                <source_line>source_line</source_line>
            </names>
        </formatting> -->
    </logger>
 
    
    <http_port>8123</http_port>
    <tcp_port>9000</tcp_port>
    <mysql_port>9004</mysql_port>
    <postgresql_port>9005</postgresql_port>
    <!-- <https_port>8443</https_port> -->
    <!-- <tcp_port_secure>9440</tcp_port_secure> -->
    <!-- <tcp_with_proxy_port>9011</tcp_with_proxy_port> -->
    <interserver_http_port>9009</interserver_http_port>
    <!-- <interserver_https_port>9010</interserver_https_port> -->
     
    <!-- 填写副本之间通信的域名orIP,若没有副本,则可以注释下面此项 -->
    <interserver_http_host>10.130.236.53</interserver_http_host>
    <!-- 填写副本之间通信的用户名和密码,和数据库密码不一样,这个是服务器密码,类似于ftp的登录操作 -->
    <!--<interserver_http_credentials>
                 <user>interserver</user>
        <password></password>
    </interserver_http_credentials>-->
     
    <!-- 白名单 -->
    <listen_host>::1</listen_host>
    <listen_host>127.0.0.1</listen_host>
    <listen_host>10.130.236.XXX</listen_host>
 
    <!-- Default values - try listen localhost on IPv4 and IPv6. -->
    <!--
             <listen_host>::1</listen_host>
    <listen_host>127.0.0.1</listen_host>
    -->
     
    <!-- 内部分片服务器的监听,用于数据交换,和 listen_host 默认一致,不建议打开二次维护 -->
    <!-- <interserver_listen_host>::</interserver_listen_host> -->
    <!-- 尝试次数 -->
    <!-- <listen_try>0</listen_try> -->
 
    <!-- 最大服务器连接数 -->
    <max_connections>4096</max_connections>
 
    <!-- 长连接的维持时间,超过此时间就关闭连接 -->
    <keep_alive_timeout>3</keep_alive_timeout>
 
    <!-- gRPC的相关配置 -->
    <grpc_port>9100</grpc_port>
    <grpc>
        <enable_ssl>false</enable_ssl>
        <ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
        <ssl_key_file>/path/to/ssl_key_file</ssl_key_file>
        <ssl_require_client_auth>false</ssl_require_client_auth>
        <ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
        <transport_compression_type>none</transport_compression_type>
        <transport_compression_level>0</transport_compression_level>
        <max_send_message_size>-1</max_send_message_size>
        <max_receive_message_size>-1</max_receive_message_size>
        <verbose_logs>false</verbose_logs>
    </grpc>
    <openSSL>
        <server>
            <verificationMode>none</verificationMode>
            <loadDefaultCAFile>true</loadDefaultCAFile>
            <cacheSessions>true</cacheSessions>
            <disableProtocols>sslv2,sslv3</disableProtocols>
            <preferServerCiphers>true</preferServerCiphers>
        </server>
        <client>
            <loadDefaultCAFile>true</loadDefaultCAFile>
            <cacheSessions>true</cacheSessions>
            <disableProtocols>sslv2,sslv3</disableProtocols>
            <preferServerCiphers>true</preferServerCiphers>
            <invalidCertificateHandler>
                <name>RejectCertificateHandler</name>
            </invalidCertificateHandler>
        </client>
    </openSSL>
     
    <!-- 查询可以调用 多少 线程处理该次查询(不包含分片服务器的线程数),0代表不做限制 -->
    <concurrent_threads_soft_limit_num>0</concurrent_threads_soft_limit_num>
    <!-- 查询可以调用 多少 内核比例处理该次查询(不包含分片服务器的内核), 0代表不做限制 -->
    <concurrent_threads_soft_limit_ratio_to_cores>0</concurrent_threads_soft_limit_ratio_to_cores>
 
 
    <!-- clickhouse可以调用的最大内存使用量,0表示不作限制且不超过max_server_memory_usage_to_ram_ratio所得到的值,
             默认值为max_server_memory_usage_to_ram_ratio的实际参数 -->
    <max_server_memory_usage>0</max_server_memory_usage>
    <!-- 内存的使用比例  -->
    <max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
 
 
 
    <!-- 最大同时查询数量(增加不用重启,即刻生效) -->
    <max_concurrent_queries>200</max_concurrent_queries>
    <!-- 全局线程池的最大线程数量,如果distributed表查询缓慢,可以尝试增加此值 -->
    <max_thread_pool_size>10000</max_thread_pool_size>
 
 
    <!-- 后台同步数据的最大线程数(增加不用重启,即刻生效) -->
    <background_buffer_flush_schedule_pool_size>16</background_buffer_flush_schedule_pool_size>
    <!-- 使用 MergeTree 引擎对表执行后台合并和更改的线程数(增加不用重启,即刻生效) -->
    <background_pool_size>16</background_pool_size>
    <background_merges_mutations_concurrency_ratio>2</background_merges_mutations_concurrency_ratio>
    <!-- 两种合并策略round_robin和shortest_task_first,
             round_robin: 每个并发都会顺序执行,无饥饿情况发生
    shortest_task_first: 优先合并小的(从小到大排队),速度最快,可能会造成-->
    <background_merges_mutations_scheduling_policy>round_robin</background_merges_mutations_scheduling_policy>
    <!-- 后台用于移动数据到另一个磁盘的线程数量(增加不用重启,即刻生效) -->
    <background_move_pool_size>8</background_move_pool_size>
    <!-- 后台用于从副本获取数据数据部分的线程数(增加不用重启,即刻生效) -->
    <background_fetches_pool_size>8</background_fetches_pool_size>
    <!-- 后台垃圾收集的最大线程数(修改不用重启,即刻生效) -->
    <background_common_pool_size>8</background_common_pool_size>
    <!-- 用于不断执行一些轻量级定期操作的最大线程数(修改不用重启,即刻生效) -->
    <background_schedule_pool_size>128</background_schedule_pool_size>
    <!-- 后台同步数据的最大线程数(修改不用重启,即刻生效) -->
    <background_message_broker_schedule_pool_size>16</background_message_broker_schedule_pool_size>
    <!-- 后台执行副本数据同步的最大线程数(修改不用重启,即刻生效) -->
    <background_distributed_schedule_pool_size>16</background_distributed_schedule_pool_size>
 
 
    <total_memory_profiler_step>4194304</total_memory_profiler_step>
    <total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
     
    <uncompressed_cache_size>8589934592</uncompressed_cache_size>
    <mark_cache_size>5368709120</mark_cache_size>
 
 
    <!-- mmap的优点暂时用不上,可以不做修改,使用默认配置 -->
    <mmap_cache_size>1000</mmap_cache_size>
 
    <compiled_expression_cache_size>134217728</compiled_expression_cache_size>
    <compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
 
    <!-- 没有带存储策略表的数据的存储路径,需要以 / 结尾 -->
   <path>/data01/clickhousedata/</path>
 
 
    <storage_configuration>
        <disks>
            <default>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </default>
            <data02>
                <path>/data02/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data02>
            <data03>
                <path>/data03/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data03>
            <data04>
                <path>/data04/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data04>
            <data05>
                <path>/data05/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data05>
            <data06>
                <path>/data06/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data06>
            <data07>
                <path>/data07/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data07>
            <data08>
                <path>/data08/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data08>
            <data09>
                <path>/data09/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data09>
            <data10>
                <path>/data10/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data10>
            <data11>
                <path>/data11/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data11>
            <data12>
                <path>/data12/clickhousedata/</path>
                <keep_free_space_bytes>107374182400</keep_free_space_bytes>
            </data12>
        </disks>
 
        <policies>
            <new_storage_only>  <!-- 策略名(唯一) -->
                <volumes>
                    <hot_volume> <!-- 卷名(唯一) -->
                        <disk>default</disk>
                        <!-- 可以存储在该磁盘上的最大大小,如果该大小太大,则写入下一个卷 -->
                        <max_data_part_size_bytes>10737418240</max_data_part_size_bytes>
                        <!-- 可以存储在该磁盘上的最大大小比例,如果该大小太大,则写入下一个卷 -->
                        <!-- <max_data_part_size_ratio></max_data_part_size_ratio> -->
                        <!-- 是否禁止insert超过ttl时间的数据进入该卷,如果禁止,这个数据会直接插入备份卷,
                                                     而不是插入到这个卷后由clickhouse定时任务触发移动操作 -->
                        <perform_ttl_move_on_insert>false</perform_ttl_move_on_insert>
                        <!-- 是否禁止这个卷自动合并数据块 -->
                        <prefer_not_to_merge>false</prefer_not_to_merge>
                        <!-- 磁盘的平衡策¥,轮询平衡,如果只有一个磁盘则没有意义 = =  -->
                        <load_balancing>round_robin</load_balancing>
                    </hot_volume>
                    <cold_volume>
                        <disk>data02</disk>
                        <disk>data03</disk>
                        <disk>data04</disk>
                        <disk>data05</disk>
                        <disk>data06</disk>
                        <disk>data07</disk>
                        <disk>data08</disk>
                        <disk>data09</disk>
                        <disk>data10</disk>
                        <disk>data11</disk>
                        <disk>data12</disk>
                        <load_balancing>round_robin</load_balancing>
                    </cold_volume>
                </volumes>
                <!-- 该策略下的移动因子,一般而言 有两种因素会导致数据搬迁到另一个卷中
                                     1.ttl过期策略移动,该移动由clickhouse后台线程检查执行移动
                2.磁盘空间不足后移动,该移动在本地磁盘可用大小 小于 move_factor * disk_size则触发移动 -->
                <move_factor>0.3</move_factor>
            </new_storage_only>
        </policies>
    </storage_configuration>
  
 
    <!-- 用于查询的临时数据存放 -->
    <tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
 
    <!-- Disable AuthType plaintext_password and no_password for ACL. -->
    <allow_plaintext_password>1</allow_plaintext_password>
    <allow_no_password>1</allow_no_password>
    <allow_implicit_no_password>1</allow_implicit_no_password>
 
    <!-- 密码复杂性要求,打开此设置会要求设置的密码有以下规则 -->
    <!-- <password_complexity>
                 <rule>
            <pattern>.{12}</pattern>
            <message>be at least 12 characters long</message>
        </rule>
        <rule>
            <pattern>\p{N}</pattern>
            <message>contain at least 1 numeric character</message>
        </rule>
        <rule>
            <pattern>\p{Ll}</pattern>
            <message>contain at least 1 lowercase character</message>
        </rule>
        <rule>
            <pattern>\p{Lu}</pattern>
            <message>contain at least 1 uppercase character</message>
        </rule>
        <rule>
            <pattern>[^\p{L}\p{N}]</pattern>
            <message>contain at least 1 special character</message>
        </rule>
    </password_complexity> -->
 
    <!-- 用户的文件目录,可以通过file函数使用这些文件 -->
    <user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
 
    <!-- LDAP 服务,暂时用不上 -->
    <ldap_servers>
    </ldap_servers>
 
    <!-- 用户的配置文件 -->
    <user_directories>
        <users_xml>
            <!-- 默认配置文件 -->
            <path>users.xml</path>
        </users_xml>
        <local_directory>
            <!-- 通过SQL命令创建的用户在该文件夹下 -->
            <path>/var/lib/clickhouse/access/</path>
        </local_directory>
    </user_directories>
 
    <access_control_improvements>
        <!-- 默认权限,未设置权限的情况下是否允许用户读取表 -->
        <users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
 
        <!-- 默认权限,未设置权限的情况下是否允许用户读取表(cluster情况下) -->
        <on_cluster_queries_require_cluster_grant>false</on_cluster_queries_require_cluster_grant>
 
        <!-- 默认系统表是否需要预先设置权限才能够读取,false表示不需要预设权限 -->
        <select_from_system_db_requires_grant>false</select_from_system_db_requires_grant>
 
        <!-- 默认information_schema表是否需要预先设置权限才能够读取,false表示不需要预设权限 -->
        <select_from_information_schema_requires_grant>false</select_from_information_schema_requires_grant>
         
        <settings_constraints_replace_previous>false</settings_constraints_replace_previous>
        <!-- 角色缓存秒数,缓存十分种,十分钟之内不需要重新获取角色权限 -->
        <role_cache_expiration_time_seconds>600</role_cache_expiration_time_seconds>
    </access_control_improvements>
 
    <!--默认配置 -->
    <default_profile>default</default_profile>
 
    <!-- 用户设置的前缀 -->
    <custom_settings_prefixes></custom_settings_prefixes>
 
    <!-- System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on). -->
    <!-- <system_profile>default</system_profile> -->
 
    <!-- Buffer profile of settings.
                  This settings are used by Buffer storage to flush data to the underlying table.
         Default: used from system_profile directive.
    -->
    <!-- <buffer_profile>default</buffer_profile> -->
 
    <!-- 默认使用的数据库 -->
    <default_database>default</default_database>
     
    <!-- 数据库的时区,未指定则默认系统时区 -->
    <timezone>Asia/Shanghai</timezone>
 
    <!-- clickhouse服务创建文件夹的默认权限,clickhouse默认为027,linux默认为022 -->
    <!-- <umask>022</umask> -->
 
    <!-- 服务启动后悔执行mlockall,加载数据,降低首次查询,会导致启动时间增加 -->
    <mlock_executable>true</mlock_executable>
 
    <!-- 实验性功能,建议关闭 -->
    <remap_executable>false</remap_executable>
     
    <remote_servers incl="clickhouse_remote_servers"/>
     
 
 
    <!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
                  By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
         Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.
      -->
 
    <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
                  Optional. If you don't use replicated tables, you could omit that.
 
         See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
      -->
 
    <zookeeper>
        <node>
            <host>10.130.237.XXX</host>
            <port>2182</port>
        </node>
        <node>
            <host>10.130.237.XXX</host>
            <port>2182</port>
        </node>
        <node>
            <host>10.130.237.XXX</host>
            <port>2182</port>
        </node>
       <identity>clickhouse:XXX</identity>
     </zookeeper>
 
 
    <!-- 指定宏,用于代替集群表创建时的通用可省略配置 -->
    <macros>
        <shard>02</shard>
        <replica>10.130.236.XXX</replica>
    </macros>
 
 
    <!-- 嵌入式词典的加载定时任务时间间隔 -->
    <builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
 
 
    <!-- session的最大超时时间,秒为单位 -->
    <max_session_timeout>3600</max_session_timeout>
 
    <!-- session的默认超时时间,秒为单位 -->
    <default_session_timeout>60</default_session_timeout>

     <!-- drop表后延迟多久会同步到ZK上,默认480,单位:秒 -->
     <database_atomic_delay_before_drop_table_sec>60</database_atomic_delay_before_drop_table_sec> 
 
    <!-- prometheus监控的配置项,待完善 -->
    <!--
                 endpoint - mertics path (relative to root, statring with "/")
        port - port to setup server. If not defined or 0 than http_port used
        metrics - send data from table system.metrics
        events - send data from table system.events
        asynchronous_metrics - send data from table system.asynchronous_metrics
        status_info - send data from different component from CH, ex: Dictionaries status
    -->
    <!--
             <prometheus>
        <endpoint>/metrics</endpoint>
        <port>9363</port>
 
        <metrics>true</metrics>
        <events>true</events>
        <asynchronous_metrics>true</asynchronous_metrics>
        <status_info>true</status_info>
    </prometheus>
    -->
 <query_log>
        <!--  查询日志记录表,如果表不存在或者表结构和标准结构不一致,则旧表会被重命名,创建新表-->
        <database>system</database>
        <table>query_log</table>
        <!-- <partition_by>toYYYYMM(event_date)</partition_by> -->
		<!-- <ttl>event_date + INTERVAL 30 DAY DELETE</ttl> -->
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </query_log>

    <trace_log>
        <database>system</database>
        <table>trace_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </trace_log>

    <query_thread_log>
        <database>system</database>
        <table>query_thread_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </query_thread_log>

    <!-- Query views log. Has information about all dependent views associated with a query.
                  Used only for queries with setting log_query_views = 1. -->
    <query_views_log>
        <database>system</database>
        <table>query_views_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </query_views_log>

    <!-- Uncomment if use part log.
                  Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).-->
    <part_log>
        <database>system</database>
        <table>part_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </part_log>

    <!-- Uncomment to write text log into table.
                  Text log contains all information from usual server log but stores it in structured and efficient way.
         The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table.
    <text_log>
        <database>system</database>
        <table>text_log</table>
		<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
        <level></level>
    </text_log>
    -->

    <!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval. -->
    <metric_log>
        <database>system</database>
        <table>metric_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
        <collect_interval_milliseconds>1000</collect_interval_milliseconds>
    </metric_log>

    <!--
                 Asynchronous metric log contains values of metrics from
        system.asynchronous_metrics.
    -->
    <asynchronous_metric_log>
        <database>system</database>
        <table>asynchronous_metric_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7000</flush_interval_milliseconds>
    </asynchronous_metric_log>

    <!--
                 OpenTelemetry log contains OpenTelemetry trace spans.
    -->
    <opentelemetry_span_log>
        <engine>
            engine MergeTree
            partition by toYYYYMM(finish_date)
            order by (finish_date, finish_time_us, trace_id) 
			TTL finish_date + INTERVAL 30 day DELETE  SETTINGS storage_policy = 'new_storage_only'
        </engine>
        <database>system</database>
        <table>opentelemetry_span_log</table>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </opentelemetry_span_log>



    <!-- Crash log. Stores stack traces for fatal errors.
                  This table is normally empty. -->
    <crash_log>
        <database>system</database>
        <table>crash_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>1000</flush_interval_milliseconds>
    </crash_log>

    <!-- Session log. Stores user log in (successful or not) and log out events.

        Note: session log has known security issues and should not be used in production.
    -->
    <session_log>
        <database>system</database>
        <table>session_log</table>
		<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </session_log>

    <!-- Profiling on Processors level. -->
    <processors_profile_log>
        <database>system</database>
        <table>processors_profile_log</table>
	<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) TTL event_date + INTERVAL 30 day DELETE SETTINGS storage_policy = 'new_storage_only'</engine>
        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
    </processors_profile_log>

    <!-- Log of asynchronous inserts. It allows to check status
                  of insert query in fire-and-forget mode.
    -->
    <asynchronous_insert_log>
        <database>system</database>
        <table>asynchronous_insert_log</table>

        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
        <partition_by>event_date</partition_by>
        <ttl>event_date + INTERVAL 3 DAY DELETE</ttl>
    </asynchronous_insert_log>

 


    
    <!-- 自定义字典 -->
    <dictionaries_config>*_dictionary.xml</dictionaries_config>
 
    <!-- 自定义函数 -->
    <user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
 
    <!-- Path in ZooKeeper to store user-defined SQL functions created by the command CREATE FUNCTION.
              If not specified they will be stored locally. -->
    <!-- <user_defined_zookeeper_path>/clickhouse/user_defined<user_defined_zookeeper_path> -->
 
    <!-- 压缩设置,若设置,则默认是lz4的压缩方式,也可以指定为zstd压缩方式 -->
    <!--
             <compression>
        <case>
            <!- - Conditions. All must be satisfied. Some conditions may be omitted. - ->
            <min_part_size>10000000000</min_part_size>        <!- - Min part size in bytes. - ->
            <min_part_size_ratio>0.01</min_part_size_ratio>   <!- - Min size of part relative to whole table size. - ->
            <method>zstd</method>
        </case>
    </compression>
    -->
    <!-- 加密配置,暂时用不上 -->
    <encryption_codecs>
        <!-- aes_128_gcm_siv -->
            <!-- Example of getting hex key from env -->
            <!-- the code should use this key and throw an exception if its length is not 16 bytes -->
            <!--key_hex from_env="..."></key_hex -->
 
            <!-- Example of multiple hex keys. They can be imported from env or be written down in config-->
            <!-- the code should use these keys and throw an exception if their length is not 16 bytes -->
            <!-- key_hex id="0">...</key_hex -->
            <!-- key_hex id="1" from_env=".."></key_hex -->
            <!-- key_hex id="2">...</key_hex -->
            <!-- current_key_id>2</current_key_id -->
 
            <!-- Example of getting hex key from config -->
            <!-- the code should use this key and throw an exception if its length is not 16 bytes -->
            <!-- key>...</key -->
 
            <!-- example of adding nonce -->
            <!-- nonce>...</nonce -->
 
        <!-- /aes_128_gcm_siv -->
    </encryption_codecs>
 
    <!-- 分布式表存储在zookeeper上的path和相关配置 -->
    <distributed_ddl>
        <!-- Path in ZooKeeper to queue with DDL queries -->
        <path>/clickhouse/task_queue/ddl</path>
        <!-- Settings from this profile will be used to execute DDL queries -->
        <profile>default</profile>
        <!-- 执行ON CLUSTER的线程个数,一般来说只有DDL才会执行,不会有很高的并发量 -->
        <pool_size>1</pool_size>
        <!-- task的存活时间,默认1周 -->
        <task_max_lifetime>604800</task_max_lifetime>
        <!-- 多久时间杀死一次已经完成的任务,默认单位:秒 -->
        <cleanup_delay_period>60</cleanup_delay_period>
        <!-- 任务队列长度 -->
        <max_tasks_in_queue>1000</max_tasks_in_queue>
    </distributed_ddl>
 
    <!-- mergetree的默认设置 -->
    <merge_tree>
        <!-- 单表最大可存在的坏块,默认值:100 -->
        <max_suspicious_broken_parts>100</max_suspicious_broken_parts>
	<max_suspicious_broken_parts_bytes>10G</max_suspicious_broken_parts_bytes>
        <!-- 如果单表中存在超过这么多活动的块(简而言之,就是
                     1.查询涉及到的块过多,
        2.插入的时间范围太大,导致的插入块太多),
        则插入会报错,默认值:300 -->
        <parts_to_throw_insert>600</parts_to_throw_insert>
        <!-- 如果单表中存在超过这么多活动的块,则插入会延迟,默认值:150 -->
        <parts_to_delay_insert>300</parts_to_delay_insert>
        <!-- 插入延迟的时间,单位:秒,默认值:1-->
        <max_delay_to_insert>1</max_delay_to_insert>
        <!-- 所有partitions中活动的parts个数超过这个则会报错,单位:秒,默认值:100000-->
        <max_parts_in_total>100000</max_parts_in_total>
        <!-- 单次最大删除parts的限制,0表示无限制,默认值:0 -->
        <simultaneous_parts_removal_limit>0</simultaneous_parts_removal_limit>
         
        <inactive_parts_to_throw_insert>0</inactive_parts_to_throw_insert>
        <inactive_parts_to_delay_insert>0</inactive_parts_to_delay_insert>
    </merge_tree>
     
 
    <!-- 两个删除表限制,不建议修改,默认为50G,超过50G不允许删除,0表示不限制大小都可以删除  -->
    <!-- <max_table_size_to_drop>0</max_table_size_to_drop> -->
    <!-- <max_partition_size_to_drop>0</max_partition_size_to_drop> -->
 
    <!-- Example of parameters for GraphiteMergeTree table engine -->
    <graphite_rollup_example>
        <pattern>
            <regexp>click_cost</regexp>
            <function>any</function>
            <retention>
                <age>0</age>
                <precision>3600</precision>
            </retention>
            <retention>
                <age>86400</age>
                <precision>60</precision>
            </retention>
        </pattern>
        <default>
            <function>max</function>
            <retention>
                <age>0</age>
                <precision>60</precision>
            </retention>
            <retention>
                <age>3600</age>
                <precision>300</precision>
            </retention>
            <retention>
                <age>86400</age>
                <precision>3600</precision>
            </retention>
        </default>
    </graphite_rollup_example>
 
    <!-- 默认的格式化列表存放目录 -->
    <format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
 
    <!-- 这个是查询的加密信息,可以针对查询出来的特定字符进行加密操作,该加密只负责从本地查询出来的数据加密,若
                  是分布式的集群表,则每个节点都需要设置,   暂时不要使用
    <query_masking_rules>
        <rule>
            <name>hide encrypt/decrypt arguments</name>
            <regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
            <replace>\1(???)</replace>
        </rule>
    </query_masking_rules> -->
 
    <!-- 自定义Http处理程序,如果需要设置访问clickhouse的接口的,可以使用该设置
                 这个配置是按照顺序从上到下匹配到第一个规则后使用该规则,不会穿透
            url - to match request URL, you can use 'regex:' prefix to use regex match(optional)
            methods - to match request method, you can use commas to separate multiple method matches(optional)
            headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional)
        handler is request handler
            type - supported types: static, dynamic_query_handler, predefined_query_handler
            query - use with predefined_query_handler type, executes query when the handler is called
            query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params
            status - use with static type, response status code
            content_type - use with static type, response content-type
            response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client.
    <http_handlers>
        <rule>
            <url>/</url>
            <methods>POST,GET</methods>
            <headers><pragma>no-cache</pragma></headers>
            <handler>
                <type>dynamic_query_handler</type>
                <query_param_name>query</query_param_name>
            </handler>
        </rule>
 
        <rule>
            <url>/predefined_query</url>
            <methods>POST,GET</methods>
            <handler>
                <type>predefined_query_handler</type>
                <query>SELECT * FROM system.settings</query>
            </handler>
        </rule>
 
        <rule>
            <handler>
                <type>static</type>
                <status>200</status>
                <content_type>text/plain; charset=UTF-8</content_type>
                <response_content>config://http_server_default_response</response_content>
            </handler>
        </rule>
    </http_handlers>
    -->
    <!-- 崩溃后通过Sentry向 ClickHouse 核心开发团队发送崩溃报告,不开 -->
    <send_crash_reports>
        <enabled>false</enabled>
        <anonymize>false</anonymize>
        <endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
    </send_crash_reports>
 
    <!-- 禁用内部 DNS 缓存。推荐用于在具有频繁变化的基础设施(例如 Kubernetes)的系统中运行 ClickHouse。默认值:0 -->
    <disable_internal_dns_cache>0</disable_internal_dns_cache>
     
     
    <!-- 查询缓存设置,默认即可 -->
    <!-- <query_cache> -->
    <!--     <max_size>1073741824</max_size> -->
    <!--     <max_entries>1024</max_entries> -->
    <!--     <max_entry_size>1048576</max_entry_size> -->
    <!--     <max_entry_rows>30000000</max_entry_rows> -->
    <!-- </query_cache> -->
 
    <!-- 元数据缓存设置,默认即可 -->
    <!--merge_tree_metadata_cache>
                 <lru_cache_size>268435456</lru_cache_size>
        <continue_if_corrupted>true</continue_if_corrupted>
    </merge_tree_metadata_cache-->
</clickhouse>

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Hadoop配置文件是用来配置Hadoop集群的参数和属性的文件。它们位于Hadoop安装目录的`etc/hadoop`文件夹中。以下是一些常见的Hadoop配置文件及其详解: 1. **core-site.xml**: 这个配置文件包含了Hadoop核心的配置属性,比如文件系统的默认URI、I/O缓冲区大小和权限检查等。其中一个重要的属性是`fs.defaultFS`,它指定了默认的文件系统URI。 2. **hdfs-site.xml**: 这个配置文件包含了Hadoop分布式文件系统(HDFS)的配置属性。它定义了HDFS的数据块大小、副本数量、存储路径等。其中一个重要的属性是`dfs.replication`,它指定了数据块的副本数量。 3. **mapred-site.xml**: 这个配置文件包含了MapReduce框架的配置属性。它定义了作业跟踪器和任务跟踪器的地址、任务重试次数、任务并行度等。其中一个重要的属性是`mapreduce.framework.name`,它指定了使用的MapReduce框架。 4. **yarn-site.xml**: 这个配置文件包含了YARN(Yet Another Resource Negotiator)资源管理框架的配置属性。它定义了资源管理器和节点管理器的地址、任务分配策略、容器内存大小等。其中一个重要的属性是`yarn.resourcemanager.hostname`,它指定了资源管理器的主机名。 这些只是Hadoop配置文件的一部分,还有其他一些配置文件用于特定组件或插件的配置。每个配置文件都有其特定的作用,通过修改这些配置文件,可以根据集群的需求来定制Hadoop的行为和性能。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值