【iceberg数据一致性】iceberg如何保证高并发数据一致性

在使用iceberg写数据时,一直弄不清楚为什么iceberg写入快,并且能够保证数据的一致性。今天决定搞清楚这个问题,经过查询和理解,写下来。

文件格式

iceberg元数据的文件目前有三个:metadata.json,snap.avro,m*.avro,它们分别对应了iceberg的元数据文件metadata,manifest-list,manifest-file文件,对于文件格式内容不明白的同学去查询资料看看,这里不做说明。

文件内容的格式如下

metadata.json

{
  "format-version" : 2,
  "table-uuid" : "da07b515-b2ed-458c-8fc1-fd2650d0f48a",
  "location" : "s3a://wux-hoo-dev-01/ice_warehouse/p059_avenger/attr_vals",
  "last-sequence-number" : 566,
  "last-updated-ms" : 1715896148763,
  "last-column-id" : 9,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "serial_num",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 2,
      "name" : "trans_seq",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 3,
      "name" : "attr_name",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 4,
      "name" : "pre_attr_value",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 5,
      "name" : "post_attr_value",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 6,
      "name" : "in_run_file",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 7,
      "name" : "event_date",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 8,
      "name" : "family",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 9,
      "name" : "operation",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "event_date",
      "transform" : "identity",
      "source-id" : 7,
      "field-id" : 1000
    }, {
      "name" : "family",
      "transform" : "identity",
      "source-id" : 8,
      "field-id" : 1001
    }, {
      "name" : "operation",
      "transform" : "identity",
      "source-id" : 9,
      "field-id" : 1002
    } ]
  } ],
  "last-partition-id" : 1002,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "owner" : "root",
    "write.metadata.delete-after-commit.enabled" : "true",
    "schema.name-mapping.default" : "[{ \"field-id\":1, \"names\": [\"serial_num\"] },{ \"field-id\":2, \"names\": [\"trans_seq\"] },{ \"field-id\":3, \"names\": [\"attr_name\"] },{ \"field-id\":4, \"names\": [\"pre_attr_value\"] },{ \"field-id\":5, \"names\": [\"post_attr_value\"] },{ \"field-id\":6, \"names\": [\"in_run_file\"] },{ \"field-id\":7, \"names\": [\"event_date\"] },{ \"field-id\":8, \"names\": [\"family\"] },{ \"field-id\":9, \"names\": [\"operation\"] }]",
    "write.metadata.previous-versions-max" : "3",
    "write.parquet.compression-codec" : "zstd"
  },
  "current-snapshot-id" : 4890912288753727730,
  "refs" : {
    "main" : {
      "snapshot-id" : 4890912288753727730,
      "type" : "branch"
    }
  },
  "snapshots" : [ {
    "sequence-number" : 566,
    "snapshot-id" : 4890912288753727730,
    "parent-snapshot-id" : 3610607578140763977,
    "timestamp-ms" : 1715892795791,
    "summary" : {
      "operation" : "replace",
      "snapshot.producer" : "OPTIMIZE",
      "added-data-files" : "1",
      "deleted-data-files" : "3",
      "added-records" : "8728",
      "deleted-records" : "8728",
      "added-files-size" : "29130",
      "removed-files-size" : "42114",
      "changed-partition-count" : "1",
      "total-records" : "1329523",
      "total-files-size" : "2771968",
      "total-data-files" : "39",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "s3a://wux-hoo-dev-01/ice_warehouse/p059_avenger/attr_vals/metadata/snap-4890912288753727730-1-2b4564d4-e38e-4b5e-bf85-2660fa400b35.avro",
    "schema-id" : 0
  } ],
  "statistics" : [ ],
  "partition-statistics" : [ ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1715892795791,
    "snapshot-id" : 4890912288753727730
  } ],
  "metadata-log" : [ {
    "timestamp-ms" : 1715889404796,
    "metadata-file" : "s3a://wux-hoo-dev-01/ice_warehouse/p059_avenger/attr_vals/metadata/00578-0268ca68-4e34-4717-9cb1-1540070cb374.metadata.json"
  }, {
    "timestamp-ms" : 1715892548263,
    "metadata-file" : "s3a://wux-hoo-dev-01/ice_warehouse/p059_avenger/attr_vals/metadata/00579-8f364707-92b4-4c36-8b19-4080c5fd6e69.metadata.json"
  }, {
    "timestamp-ms" : 1715892795791,
    "metadata-file" : "s3a://wux-hoo-dev-01/ice_warehouse/p059_avenger/attr_vals/metadata/00580-c9b10830-04e3-4bc8-b414-57c86eca7c0d.metadata.json"
  } ]
}

snap.avro

{
 "manifest_path": 
"s3://datalake/db1/orders/metadata/62acb3d7-e992-4cbc-8e41-58809fcacb3e.avro",
 "manifest_length": 6152,
 "added_snapshot_id": 8333017788700497002,
 "added_data_files_count": 1,
 "added_rows_count": 1,
 "deleted_rows_count": 0,
 "partitions": {
        "array": [ {
            "contains_null": false,
            "lower_bound": {
                "bytes": "¹Ô\\\\u0006\\\\u0000"
            },
            "upper_bound": {
                "bytes": "¹Ô\\\\u0006\\\\u0000"
            }
        } ]
    }
}

m*.avro

{
      "data_file" : {
      "file_path" : 
“s3://datalake/db1/orders/data/order_ts_hour=2023-03-07-08/0_0_0.parquet”,

      "file_format" : "PARQUET",
      "block_size_in_bytes" : 67108864,
      "null_value_counts" : [],
      "lower_bounds" : {
     	    "array": [{
     	    "key": 1,
     	    "value": 123
                 }],
      }
     	"upper_bounds" : {
     	    "array": [{
     	    "key": 1,
     	    "value": 123
                 }],
      },
   }
}

写入过程

在写入数据时,iceberg会先写入数据文件data.parquet,然后写m*.avro,再写snap.avro,然后是metadata.json,最后更新iceberg数据库中iceberg_tables的metadata_location字段。如此则完成一次完整写入。

高并发一致性

那么在高并发的时候,同时操作某张表时是如何运作的呢?其实这里iceberg引用了乐观锁的方法。其实在写入数据的时候是有一个验证的操作的,即在准备写入时会检查原文件和带写入文件的差异,如果存在本次写入数据以外的数据,则代表有其他操作已经更新了原文件,那么本次写入会取消,并重试写入的步骤,知道验证完成,写入完成,如此就保证了数据的一致性,并且也解决了HMS锁的问题。

kimi的回答

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值