clickhouse引擎--MergeTree引擎

MergeTree引擎

一.MergeTree引擎

MergeTree系列的表引擎是clickhouse数据存储功能的核心,它们提供了用于弹性和高性能数据检索的大多数功能:列存储,自定义分区,稀疏的主索引,辅助数据跳过索引等

主要特点:

  1. 存储按主键排序的数据(必须有order by(主键,…))
  2. 如果指定了分区键,则可以使用分区
  3. 数据复制支持
  4. 数据采样支持
create table tb_merge_tree(
uid UInt8,
name String,
birthday Date,
city String,
gender String
)engine=MergeTree()
primary key uid
order by (uid,birthday);

insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘


**看一下底层存储**
[root@spark-140 test1]# cd tb_merge_tree/
[root@spark-140 tb_merge_tree]# ll
total 12
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0    //存着表的所有数据
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_1_0/
[root@spark-140 all_1_1_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:18 checksums.txt   //数据安全校验
-rw-r----- 1 root root 109 Aug 30 17:18 columns.txt   //列信息文件,存放列字段信息
-rw-r----- 1 root root   1 Aug 30 17:18 count.txt   //记录当前数据的条数
-rw-r----- 1 root root 174 Aug 30 17:18 data.bin    //数据文件
-rw-r----- 1 root root 176 Aug 30 17:18 data.mrk3   //列字段标记文件
-rw-r----- 1 root root  10 Aug 30 17:18 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:18 primary.idx  //一级索引文件,用于存放稀疏索引
[root@spark-140 all_1_1_0]# cat count.txt4

insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');                    //再次执行一次插入操作

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
//主键没有唯一要求,是可以重复的


[root@spark-140 tb_merge_tree]# ll
total 16
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0  //第二次插入操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_2_2_0/
[root@spark-140 all_2_2_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:26 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:26 columns.txt
-rw-r----- 1 root root   1 Aug 30 17:26 count.txt
-rw-r----- 1 root root 174 Aug 30 17:26 data.bin
-rw-r----- 1 root root 176 Aug 30 17:26 data.mrk3
-rw-r----- 1 root root  10 Aug 30 17:26 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:26 primary.idx
[root@spark-140 all_2_2_0]# cat count.txt
4


**合并这两个数据块**
optimize table tb_merge_tree [final];   //final关键字是强制合并的意思,用于多个表合并时

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
这时再看一下底层存储
[root@spark-140 tb_merge_tree]# ll
total 20
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:31 all_1_2_1  //这是合并操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_2_1/
//all_1_2_1数字分别代表什么   1代表最低版本,2代表最高版本,1代表合并操作一次
[root@spark-140 all_1_2_1]# ll
total 28-rw-r----- 1 root root 187 Aug 30 17:31 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:31 columns.txt
-rw-r----- 1 root root   1 Aug 30 17:31 count.txt
-rw-r----- 1 root root 205 Aug 30 17:31 data.bin
-rw-r----- 1 root root 176 Aug 30 17:31 data.mrk3  
-rw-r----- 1 root root  10 Aug 30 17:31 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:31 primary.idx
[root@spark-140 all_1_2_1]# cat count.txt
8


create table tb_merge_tree_partition(
uid UInt8,
name String,
birthday DateTime
)engine=MergeTree()
partition by toDate(birthday)
order by uid;

insert into tb_merge_tree_partition values
(1,'zs','2020-09-27 10:00:00'),
(2,'ls','2020-09-27 11:00:00'),
(3,'ww','2020-08-18 10:00:00'),
(4,'zl','2020-08-18 11:00:00');

select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘


insert into tb_merge_tree_partition values
(11,'zss','2020-09-27 10:00:00'),
(12,'lss','2020-09-27 11:00:00'),
(13,'www','2020-08-18 10:00:00'),
(14,'zll','2020-08-18 11:00:00');

select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│  11 │ zss  │ 2020-09-27 10:00:00 │
│  12 │ lss  │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│  13 │ www  │ 2020-08-18 10:00:00 │
│  14 │ zll  │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘

**看一下底层存储**
cd /data/clickhouse/data/test1/tb_merge_tree_partition
[root@spark-140 tb_merge_tree_partition]# ll
total 24
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root    1 Aug 30 19:58 format_version.txt


**合并操作**
optimize table tb_merge_tree_partition final;
select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
│  11 │ zss  │ 2020-09-27 10:00:00 │
│  12 │ lss  │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
│  13 │ www  │ 2020-08-18 10:00:00 │
│  14 │ zll  │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘

[root@spark-140 tb_merge_tree_partition]# ll
total 28
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 20:01 20200818_2_4_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 20:02 20200927_1_3_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root    1 Aug 30 19:58 format_version.txt
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值