下面查看V2格式下表的格式
创建一张表
CREATE TABLE local.db.sampleV2 (
id bigint,
data string,
category string)
USING iceberg
PARTITIONED BY (category)
TBLPROPERTIES ('format-version'='2');
查看表结构文件:
{
"format-version" : 2,
"table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
"location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
"last-sequence-number" : 0,
"last-updated-ms" : 1642173468635,
"last-column-id" : 3,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "id",
"required" : false,
"type" : "long"
}, {
"id" : 2,
"name" : "data",
"required" : false,
"type" : "string"
}, {
"id" : 3,
"name" : "category",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "category",
"transform" : "identity",
"source-id" : 3,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "liliwei"
},
"current-snapshot-id" : -1,
"snapshots" : [ ],
"snapshot-log" : [ ],
"metadata-log" : [ ]
}
插入数据:
insert into local.db.sampleV2 values(1,'a','1');
查看manifest list文件
(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── v1.metadata.json
├── v2.metadata.json
└── version-hint.text
0 directories, 5 files
java -jar ~/plat/tools/avro-tools-1.10.2.jar tojson snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
{
"manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro",
"manifest_length": 6833,
"partition_spec_id": 0,
"content": 0,
"sequence_number": 1,
"min_sequence_number": 1,
"added_snapshot_id": 1504400791559924261,
"added_data_files_count": 1,
"existing_data_files_count": 0,
"deleted_data_files_count": 0,
"added_rows_count": 1,
"existing_rows_count": 0,
"deleted_rows_count": 0,
"partitions": {
"array": [{
"contains_null": false,
"contains_nan": {
"boolean": false
},
"lower_bound": {
"bytes": "1"
},
"upper_bound": {
"bytes": "1"
}
}]
}
}
进行变更
ALTER TABLE local.db.sampleV2 ADD PARTITION FIELD data;
查看目录结构 :
(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
└── version-hint.text
0 directories, 6 files
查看v3内容:
{
"format-version" : 2,
"table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
"location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
"last-sequence-number" : 1,
"last-updated-ms" : 1642176605638,
"last-column-id" : 3,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "id",
"required" : false,
"type" : "long"
}, {
"id" : 2,
"name" : "data",
"required" : false,
"type" : "string"
}, {
"id" : 3,
"name" : "category",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 1,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "category",
"transform" : "identity",
"source-id" : 3,
"field-id" : 1000
} ]
}, {
"spec-id" : 1,
"fields" : [ {
"name" : "category",
"transform" : "identity",
"source-id" : 3,
"field-id" : 1000
}, {
"name" : "data",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1001
} ]
} ],
"last-partition-id" : 1001,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "liliwei"
},
"current-snapshot-id" : 1504400791559924261,
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 1504400791559924261,
"timestamp-ms" : 1642176476606,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1642173017469",
"added-data-files" : "1",
"added-records" : "1",
"added-files-size" : "874",
"changed-partition-count" : "1",
"total-records" : "1",
"total-files-size" : "874",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1642176476606,
"snapshot-id" : 1504400791559924261
} ],
"metadata-log" : [ {
"timestamp-ms" : 1642173468635,
"metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json"
}, {
"timestamp-ms" : 1642176476606,
"metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json"
} ]
}
插入数据:
insert into local.db.sampleV2 values(2,'b','2');
查看目录结构:
(base) ➜ metadata tree -l
.
├── 2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro
├── 3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro
├── snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro
├── snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro
├── v1.metadata.json
├── v2.metadata.json
├── v3.metadata.json
├── v4.metadata.json
└── version-hint.text
0 directories, 9 files
查看v4文件:
{
"format-version" : 2,
"table-uuid" : "5c786a06-aeec-4559-9b3d-79687d82a809",
"location" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2",
"last-sequence-number" : 2,
"last-updated-ms" : 1642176734997,
"last-column-id" : 3,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "id",
"required" : false,
"type" : "long"
}, {
"id" : 2,
"name" : "data",
"required" : false,
"type" : "string"
}, {
"id" : 3,
"name" : "category",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 1,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "category",
"transform" : "identity",
"source-id" : 3,
"field-id" : 1000
} ]
}, {
"spec-id" : 1,
"fields" : [ {
"name" : "category",
"transform" : "identity",
"source-id" : 3,
"field-id" : 1000
}, {
"name" : "data",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1001
} ]
} ],
"last-partition-id" : 1001,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "liliwei"
},
"current-snapshot-id" : 506027699712535420,
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 1504400791559924261,
"timestamp-ms" : 1642176476606,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1642173017469",
"added-data-files" : "1",
"added-records" : "1",
"added-files-size" : "874",
"changed-partition-count" : "1",
"total-records" : "1",
"total-files-size" : "874",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-1504400791559924261-1-2c7688ff-595a-4d9e-ba36-7fd68e70500f.avro",
"schema-id" : 0
}, {
"sequence-number" : 2,
"snapshot-id" : 506027699712535420,
"parent-snapshot-id" : 1504400791559924261,
"timestamp-ms" : 1642176734997,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1642173017469",
"added-data-files" : "1",
"added-records" : "1",
"added-files-size" : "874",
"changed-partition-count" : "1",
"total-records" : "2",
"total-files-size" : "1748",
"total-data-files" : "2",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/snap-506027699712535420-1-3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1642176476606,
"snapshot-id" : 1504400791559924261
}, {
"timestamp-ms" : 1642176734997,
"snapshot-id" : 506027699712535420
} ],
"metadata-log" : [ {
"timestamp-ms" : 1642173468635,
"metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v1.metadata.json"
}, {
"timestamp-ms" : 1642176476606,
"metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v2.metadata.json"
}, {
"timestamp-ms" : 1642176605638,
"metadata-file" : "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/v3.metadata.json"
} ]
}
查看manifest list文件:
{
"manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/3f59d998-6448-4d83-9dcb-5ceb5c5d1f7d-m0.avro",
"manifest_length": 7036,
"partition_spec_id": 1,
"content": 0,
"sequence_number": 2,
"min_sequence_number": 2,
"added_snapshot_id": 506027699712535420,
"added_data_files_count": 1,
"existing_data_files_count": 0,
"deleted_data_files_count": 0,
"added_rows_count": 1,
"existing_rows_count": 0,
"deleted_rows_count": 0,
"partitions": {
"array": [{
"contains_null": false,
"contains_nan": {
"boolean": false
},
"lower_bound": {
"bytes": "2"
},
"upper_bound": {
"bytes": "2"
}
}, {
"contains_null": false,
"contains_nan": {
"boolean": false
},
"lower_bound": {
"bytes": "b"
},
"upper_bound": {
"bytes": "b"
}
}]
}
} {
"manifest_path": "/Users/liliwei/plat/spark-3.1.2-bin-hadoop3.2/warehouse/db/sampleV2/metadata/2c7688ff-595a-4d9e-ba36-7fd68e70500f-m0.avro",
"manifest_length": 6833,
"partition_spec_id": 0,
"content": 0,
"sequence_number": 1,
"min_sequence_number": 1,
"added_snapshot_id": 1504400791559924261,
"added_data_files_count": 1,
"existing_data_files_count": 0,
"deleted_data_files_count": 0,
"added_rows_count": 1,
"existing_rows_count": 0,
"deleted_rows_count": 0,
"partitions": {
"array": [{
"contains_null": false,
"contains_nan": {
"boolean": false
},
"lower_bound": {
"bytes": "1"
},
"upper_bound": {
"bytes": "1"
}
}]
}
}