clickhouse引擎--MergeTree引擎

最新推荐文章于 2024-05-21 14:52:28 发布

小陈菜奈-

最新推荐文章于 2024-05-21 14:52:28 发布

阅读量276

点赞数 2

分类专栏： Clickhouse 文章标签：数据库 database

本文链接：https://blog.csdn.net/nayomi927/article/details/120130232

版权

Clickhouse 专栏收录该内容

23 篇文章 2 订阅

订阅专栏

MergeTree引擎

一.MergeTree引擎

一.MergeTree引擎

MergeTree系列的表引擎是clickhouse数据存储功能的核心，它们提供了用于弹性和高性能数据检索的大多数功能：列存储，自定义分区，稀疏的主索引，辅助数据跳过索引等

主要特点：

存储按主键排序的数据（必须有order by（主键,…））
如果指定了分区键，则可以使用分区
数据复制支持
数据采样支持

create table tb_merge_tree(
uid UInt8,
name String,
birthday Date,
city String,
gender String
)engine=MergeTree()
primary key uid
order by (uid,birthday);

insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘


**看一下底层存储**
[root@spark-140 test1]# cd tb_merge_tree/
[root@spark-140 tb_merge_tree]# ll
total 12
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0    //存着表的所有数据
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_1_0/
[root@spark-140 all_1_1_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:18 checksums.txt   //数据安全校验
-rw-r----- 1 root root 109 Aug 30 17:18 columns.txt   //列信息文件，存放列字段信息
-rw-r----- 1 root root   1 Aug 30 17:18 count.txt   //记录当前数据的条数
-rw-r----- 1 root root 174 Aug 30 17:18 data.bin    //数据文件
-rw-r----- 1 root root 176 Aug 30 17:18 data.mrk3   //列字段标记文件
-rw-r----- 1 root root  10 Aug 30 17:18 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:18 primary.idx  //一级索引文件，用于存放稀疏索引
[root@spark-140 all_1_1_0]# cat count.txt4

insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');                    //再次执行一次插入操作

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
//主键没有唯一要求，是可以重复的


[root@spark-140 tb_merge_tree]# ll
total 16
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0  //第二次插入操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_2_2_0/
[root@spark-140 all_2_2_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:26 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:26 columns.txt
-rw-r----- 1 root root   1 Aug 30 17:26 count.txt
-rw-r----- 1 root root 174 Aug 30 17:26 data.bin
-rw-r----- 1 root root 176 Aug 30 17:26 data.mrk3
-rw-r----- 1 root root  10 Aug 30 17:26 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:26 primary.idx
[root@spark-140 all_2_2_0]# cat count.txt
4


**合并这两个数据块**
optimize table tb_merge_tree [final];   //final关键字是强制合并的意思，用于多个表合并时

select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   1 │ zs   │ 2021-08-30 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   2 │ ls   │ 2021-09-27 │ HZ   │ M      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   3 │ ww   │ 2021-08-18 │ SH   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
│   4 │ zl   │ 1999-09-27 │ JS   │ F      │
└─────┴──────┴────────────┴──────┴────────┘
这时再看一下底层存储
[root@spark-140 tb_merge_tree]# ll
total 20
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:31 all_1_2_1  //这是合并操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root    1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_2_1/
//all_1_2_1数字分别代表什么   1代表最低版本，2代表最高版本，1代表合并操作一次
[root@spark-140 all_1_2_1]# ll
total 28-rw-r----- 1 root root 187 Aug 30 17:31 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:31 columns.txt
-rw-r----- 1 root root   1 Aug 30 17:31 count.txt
-rw-r----- 1 root root 205 Aug 30 17:31 data.bin
-rw-r----- 1 root root 176 Aug 30 17:31 data.mrk3  
-rw-r----- 1 root root  10 Aug 30 17:31 default_compression_codec.txt
-rw-r----- 1 root root   2 Aug 30 17:31 primary.idx
[root@spark-140 all_1_2_1]# cat count.txt
8


create table tb_merge_tree_partition(
uid UInt8,
name String,
birthday DateTime
)engine=MergeTree()
partition by toDate(birthday)
order by uid;

insert into tb_merge_tree_partition values
(1,'zs','2020-09-27 10:00:00'),
(2,'ls','2020-09-27 11:00:00'),
(3,'ww','2020-08-18 10:00:00'),
(4,'zl','2020-08-18 11:00:00');

select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘


insert into tb_merge_tree_partition values
(11,'zss','2020-09-27 10:00:00'),
(12,'lss','2020-09-27 11:00:00'),
(13,'www','2020-08-18 10:00:00'),
(14,'zll','2020-08-18 11:00:00');

select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│  11 │ zss  │ 2020-09-27 10:00:00 │
│  12 │ lss  │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│  13 │ www  │ 2020-08-18 10:00:00 │
│  14 │ zll  │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘

**看一下底层存储**
cd /data/clickhouse/data/test1/tb_merge_tree_partition
[root@spark-140 tb_merge_tree_partition]# ll
total 24
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root    1 Aug 30 19:58 format_version.txt


**合并操作**
optimize table tb_merge_tree_partition final;
select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│   1 │ zs   │ 2020-09-27 10:00:00 │
│   2 │ ls   │ 2020-09-27 11:00:00 │
│  11 │ zss  │ 2020-09-27 10:00:00 │
│  12 │ lss  │ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│   3 │ ww   │ 2020-08-18 10:00:00 │
│   4 │ zl   │ 2020-08-18 11:00:00 │
│  13 │ www  │ 2020-08-18 10:00:00 │
│  14 │ zll  │ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘

[root@spark-140 tb_merge_tree_partition]# ll
total 28
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 20:01 20200818_2_4_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 20:02 20200927_1_3_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root    1 Aug 30 19:58 format_version.txt

小陈菜奈-

关注

2
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
clickhouse引擎--MergeTree引擎

MergeTree引擎一.MergeTree引擎二级目录三级目录一.MergeTree引擎MergeTree系列的表引擎是clickhouse数据存储功能的核心，它们提供了用于弹性和高性能数据检索的大多数功能：列存储，自定义分区，稀疏的主索引，辅助数据跳过索引等二级目录三级目录MergeTree系列的表引擎是clickhouse数据存储功能的核心，它们提供了用于弹性和高性能数据检索的大多数功能：列存储，自定义分区，稀疏的主索引，辅助数据跳过索引等主要特点：1.存储按主键排序的数据（必须有ord
复制链接

扫一扫