前提:有多个clickhouse节点并在metrika.xml配置分片和副本。
-- 查询cluster集群名称 比如nx_clickhouse_4shards_1replicas
select * from system.clusters;
-- 只在当前节点创建本地数据库
create database test;
drop database test;
-- nx_clickhouse_4shards_1replicas 是集群名称
drop database test on cluster nx_clickhouse_4shards_1replicas ;
-- on cluster 会在指定集群中的所有节点下创建同样的数据库,执行后会反馈各节点的执行结果
create database test on cluster nx_clickhouse_4shards_1replicas ;
-- 建本地表(on cluster 会在集群的各个节点上建表, 但是insert数据只会在当前节点)
drop table test.cmtest on cluster nx_clickhouse_4shards_1replicas;
CREATE TABLE test.cmtest on cluster nx_clickhouse_4shards_1replicas
(
`id` String COMMENT 'id',
`nginxTime` DateTime COMMENT 'nginxTime'
)
ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(nginxTime)
ORDER BY (nginxTime);
insert into test.cmtest values ('1',now());
insert into test.cmtest values ('10',now());
insert into test.cmtest values ('100',now()+3600*24);
-- insert数据只会在当前节点查到
select * from test.cmtest ;
-- 分布式引擎本身不存储数据, 但可以在多个服务器上进行分布式查询。
-- 分布式表(对分布式表的查询会查询到所有节点上的数据)
create TABLE test.cmtest_dist on cluster nx_clickhouse_4shards_1replicas as test.cmtest
ENGINE = Distributed("nx_clickhouse_4shards_1replicas", "test", "cmtest", rand());
-- 对本地insert只插入到本地
insert into test.cmtest values ('100108',now()+3600*24);
-- 对分布式表插入会根据规则路由到某个节点
insert into test.cmtest_dist values ('1004000',now()+3600*24);
-- 对分布式表的查询会查询到所有节点上的数据
select * from test.cmtest_dist;
-- 本地表只查询到当前节点上的数据
select * from test.cmtest;
-- 删除分布式表不会删除数据,重新创建分布式表后仍然可以查询到全量数据
drop table test.cmtest_dist on cluster nx_clickhouse_4shards_1replicas;
-- 删除本地表会删除数据
drop table test.cmtest on cluster nx_clickhouse_4shards_1replicas;
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
…
INDEX index_name1 expr1 TYPE type1(…) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(…) GRANULARITY value2
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr [DELETE|TO DISK ‘xxx’|TO VOLUME ‘xxx’], …]
[SETTINGS name=value, …]
CREATE TABLE IF NOT EXISTS all_hits ON CLUSTER cluster (p Date, i Int32) ENGINE = Distributed(cluster, default, hits)
参考 https://clickhouse.com/docs/zh/engines/table-engines/special/distributed/#distributed
https://clickhouse.com/docs/zh/engines/table-engines/mergetree-family/mergetree
https://blog.csdn.net/Urbanears/article/details/122089428
https://blog.csdn.net/weixin_45912745/article/details/123405029
https://cloud.tencent.com/developer/article/1986902
https://www.cnblogs.com/yoyo1216/p/13731232.html
https://blog.csdn.net/weixin_46999292/article/details/113281753