Apache Hudi 表目录结构

Apache Hudi 表目录结构

记录**一部分** 表目录结构和文件格式。便于对 Hudi 的设计理念和表的组织格式进行直观的理解。

数据由官网示例运行后产生。

一、COW 表类型组织逻辑

表名字为 stock_ticks_cow

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.hoodie
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B2018

1.1 ?./tablename/.hoodie

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup968 BOct 15 15:453128 MB20201015074528.rollback
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074528.rollback.inflight
-rw-r–r--rootsupergroup2.2 KBOct 15 15:453128 MB20201015074529.commit
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074529.commit.requested
-rw-r–r--rootsupergroup350 BOct 15 15:453128 MB20201015074529.inflight
-rw-r–r--rootsupergroup2.21 KBOct 15 16:203128 MB20201015082021.commit
-rw-r–r--rootsupergroup0 BOct 15 16:203128 MB20201015082021.commit.requested
-rw-r–r--rootsupergroup1.01 KBOct 15 16:203128 MB20201015082021.inflight
-rw-r–r--rootsupergroup213 BOct 15 15:363128 MBhoodie.properties
drwxr-xr-xrootsupergroup0 BOct 15 15:3600 B.aux
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.temp
drwxr-xr-xrootsupergroup0 BOct 15 15:3600 Barchived

1.1.1 ?.rollback 文件格式
Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}

1.1.2 ?.rollback.inflight 文件格式

暂无复现场景

1.1.3 ?.commit 文件格式
{
  "partitionToWriteStats" : {
    "2018/08/31" : [ {
      "fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
      "path" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet",
      "prevCommit" : "null",
      "numWrites" : 197,
      "numDeletes" : 0,
      "numUpdateWrites" : 0,
      "numInserts" : 197,
      "totalWriteBytes" : 443701,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : "2018/08/31",
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 443701
    } ]
  },
  "compacted" : false,
  "extraMetadata" : {
    "ROLLING_STAT" : "{\n  \"partitionToRollingStats\" : {\n    \"2018/08/31\" : {\n      \"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0\" : {\n        \"fileId\" : \"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0\",\n        \"inserts\" : 197,\n        \"upserts\" : 0,\n        \"deletes\" : 0,\n        \"totalInputWriteBytesToDisk\" : 0,\n        \"totalInputWriteBytesOnDisk\" : 443701\n      }\n    }\n  },\n  \"actionType\" : \"commit\"\n}",
    "schema" : "{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"double\"},{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"close\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"name\":\"day\",\"type\":\"string\"}]}",
    "deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
  },
  "fileIdAndRelativePaths" : {
    "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet"
  },
  "totalRecordsDeleted" : 0,
  "totalLogRecordsCompacted" : 0,
  "totalScanTime" : 0,
  "totalCreateTime" : 793,
  "totalUpsertTime" : 0,
  "totalCompactedRecordsUpdated" : 0,
  "totalLogFilesCompacted" : 0,
  "totalLogFilesSize" : 0
}
1.1.4 ?.commit.requested 文件格式

暂无复现场景

1.1.5 ?.inflight 文件格式
{
  "partitionToWriteStats" : {
    "2018/08/31" : [ {
      "fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
      "path" : null,
      "prevCommit" : "20201015074529",
      "numWrites" : 0,
      "numDeletes" : 0,
      "numUpdateWrites" : 99,
      "numInserts" : 0,
      "totalWriteBytes" : 0,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : null,
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 0
    } ]
  },
  "compacted" : false,
  "extraMetadata" : { },
  "totalScanTime" : 0,
  "totalCreateTime" : 0,
  "totalUpsertTime" : 0,
  "totalCompactedRecordsUpdated" : 0,
  "totalLogFilesCompacted" : 0,
  "totalLogFilesSize" : 0,
  "fileIdAndRelativePaths" : {
    "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : null
  },
  "totalRecordsDeleted" : 0,
  "totalLogRecordsCompacted" : 0
}
1.1.6 hoodie.properties
#Properties saved on Thu Oct 15 07:36:26 UTC 2020
#Thu Oct 15 07:36:26 UTC 2020
hoodie.table.name=stock_ticks_cow
hoodie.archivelog.folder=archived
hoodie.table.type=COPY_ON_WRITE
hoodie.timeline.layout.version=1

1.2 ?/tablename/part-n/pn-n/pn-n-n…

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup93 BOct 15 15:453128 MB.hoodie_partition_metadata
-rw-r–r--rootsupergroup433.3 KBOct 15 15:453128 MB8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet
-rw-r–r--rootsupergroup433.01 KBOct 15 16:203128 MB8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-25_20201015082021.parquet
1.2.1 .hoodie_partition_metadata 文件格式
#partition metadata
#Thu Oct 15 07:45:31 UTC 2020
commitTime=20201015074529
partitionDepth=3

1.2.2 ?.parquet 文件格式
5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 b204 0078
5454 0112 0000 0015 0015 1415 382c 158a
0315 0415 0615 081c 180e 3230 3230 3130
3135 3037 3435 3239 180e 3230 3230 3130
3135 3037 3435 3239 1600 280e 3230 3230
3130 3135 3037 3435 3239 180e 3230 3230
......

二、MOR 类型表目录结构

表名字为 stock_ticks_mor

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.hoodie
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B2018

2.1 ?/tablename/.hoodie

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup968 BOct 15 15:453128 MB20201015074553.rollback
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074553.rollback.inflight
-rw-r–r--rootsupergroup2.21 KBOct 15 15:453128 MB20201015074554.deltacommit
-rw-r–r--rootsupergroup350 BOct 15 15:453128 MB20201015074554.deltacommit.inflight
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074554.deltacommit.requested
-rw-r–r--rootsupergroup2.26 KBOct 15 16:203128 MB20201015082051.deltacommit
-rw-r–r--rootsupergroup1.01 KBOct 15 16:203128 MB20201015082051.deltacommit.inflight
-rw-r–r--rootsupergroup0 BOct 15 16:203128 MB20201015082051.deltacommit.requested
-rw-r–r--rootsupergroup305 BOct 15 15:373128 MBhoodie.properties
drwxr-xr-xrootsupergroup0 BOct 15 15:3700 B.aux
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B.temp
drwxr-xr-xrootsupergroup0 BOct 15 15:3700 Barchived
2.1.1 *.rollback 文件格式
Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}
2.1.2 *.rollback.inflight 文件格式

暂无场景复现

2.1.3 *.deltacommit 文件格式
{
  "partitionToWriteStats" : {
    "2018/08/31" : [ {
      "fileId" : "c7922a25-5d97-4add-8580-127fd14aa494-0",
      "path" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet",
      "prevCommit" : "null",
      "numWrites" : 197,
      "numDeletes" : 0,
      "numUpdateWrites" : 0,
      "numInserts" : 197,
      "totalWriteBytes" : 443699,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : "2018/08/31",
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 443699
    } ]
  },
  "compacted" : false,
  "extraMetadata" : {
    "ROLLING_STAT" : "{\n  \"partitionToRollingStats\" : {\n    \"2018/08/31\" : {\n      \"c7922a25-5d97-4add-8580-127fd14aa494-0\" : {\n        \"fileId\" : \"c7922a25-5d97-4add-8580-127fd14aa494-0\",\n        \"inserts\" : 197,\n        \"upserts\" : 0,\n        \"deletes\" : 0,\n        \"totalInputWriteBytesToDisk\" : 0,\n        \"totalInputWriteBytesOnDisk\" : 443699\n      }\n    }\n  },\n  \"actionType\" : \"deltacommit\"\n}",
    "schema" : "{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"double\"},{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"close\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"name\":\"day\",\"type\":\"string\"}]}",
    "deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
  },
  "fileIdAndRelativePaths" : {
    "c7922a25-5d97-4add-8580-127fd14aa494-0" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet"
  },
  "totalRecordsDeleted" : 0,
  "totalLogRecordsCompacted" : 0,
  "totalScanTime" : 0,
  "totalCreateTime" : 1280,
  "totalUpsertTime" : 0,
  "totalCompactedRecordsUpdated" : 0,
  "totalLogFilesCompacted" : 0,
  "totalLogFilesSize" : 0
}
2.1.4 *.deltacommit.inflight 文件格式
{
  "partitionToWriteStats" : { },
  "compacted" : false,
  "extraMetadata" : { },
  "fileIdAndRelativePaths" : { },
  "totalRecordsDeleted" : 0,
  "totalLogRecordsCompacted" : 0,
  "totalScanTime" : 0,
  "totalCreateTime" : 0,
  "totalUpsertTime" : 0,
  "totalCompactedRecordsUpdated" : 0,
  "totalLogFilesCompacted" : 0,
  "totalLogFilesSize" : 0
}
2.1.5 *.deltacommit.requested 文件格式

暂无复现场景

2.1.6 hoodie.properties 文件格式
#Properties saved on Thu Oct 15 07:37:05 UTC 2020
#Thu Oct 15 07:37:05 UTC 2020
hoodie.compaction.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
hoodie.table.name=stock_ticks_mor
hoodie.archivelog.folder=archived
hoodie.table.type=MERGE_ON_READ
hoodie.timeline.layout.version=1


2.2 ?/${tablename}/分区n/分区n-n/分区n-n-n/…

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup21.04 KBOct 15 16:203512 MB.c7922a25-5d97-4add-8580-127fd14aa494-0_20201015074554.log.1_0-22-25
-rw-r–r--rootsupergroup93 BOct 15 15:453128 MB.hoodie_partition_metadata
-rw-r–r--rootsupergroup433.3 KBOct 15 15:453128 MBc7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet
2.2.1 ?.log.? 文件格式
2348 5544 4923 0000 0000 0000 541e 0000
0001 0000 0003 0000 0002 0000 0000 0000
000e 3230 3230 3130 3135 3038 3230 3531
......
2.2.2 .hoodie_partition_metadata 文件格式
#partition metadata
#Thu Oct 15 07:45:56 UTC 2020
commitTime=20201015074554
partitionDepth=3
2.2.3 ?.parquet 文件格式
5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 3501 0002
bea4 3012 0000 0015 0015 1415 382c 158a
......
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值