【hudi】hudi表 常用字段类型SQL api测试

一、flink sql api

drop table my_db.hudi_type_flink;

CREATE TABLE my_db.hudi_type_flink(
	boolean_t boolean,
	tinyint_t tinyint,
	smallint_t smallint,
	integer_t integer,
	bigint_t bigint PRIMARY KEY NOT ENFORCED,
	float_t float,
	double_t double,
    decimal_t206 decimal(20,6),
	time_t time(3),
	date_t date,
	timestamp_t timestamp,
	string_t string,
	binary_t binary,
	array_t array<integer>,
	map_t map<string,string>
) WITH (
  'connector' = 'hudi',
  'path' = '/warehouse/my_db/hudi_type_flink',
  'table.type' = 'COPY_ON_WRITE'
);

insert into my_db.hudi_type_flink values(
true,
cast(8 as tinyint),
cast(8 as smallint),
8,
8,
8.8,
8.8,
88.88,
cast('18:01:01.666' as time),
cast('2020-01-01' as date),
current_timestamp,
'abc',
cast('abc' as binary),
array[1,2,3],
map['key','flink','value','connector1']
);

select * from my_db.hudi_type_flink;

二、spark sql api

drop table my_db.hudi_type_spark;

CREATE TABLE my_db.hudi_type_spark(
boolean_t boolean,
int_t int,
long_t long,
float_t float,
double_t double,
decimal_t206 decimal(20,6),
date_t date,
timestamp_t timestamp,
string_t string,
binary_t binary,
array_t array<int>,
map_t map<string,string>
) USING hudi
LOCATION '/warehouse/my_db/hudi_type_spark'
 TBLPROPERTIES (
  type = 'cow',
  primaryKey = 'long_t',
  'hoodie.datasource.hive_sync.support_timestamp' = 'true'
);

insert into my_db.hudi_type_spark values(
true
,8
,cast(8 as long)
,cast(8.8 as float)
,cast(8.8 as double)
,88.88
,current_date
,current_timestamp
,'abc'
,cast('abc' as binary)
,array(1,2,3)
,map('key','flink','value','conn1')
);

select * from my_db.hudi_type_spark;
  • 7
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在Scala中创建Hudi并设置字段,可以使用以下代码: ```scala import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.config.HoodieWriteConfig._ import org.apache.hudi.hive.MultiPartKeysValueExtractor import org.apache.hudi.keygen.SimpleKeyGenerator import org.apache.spark.sql.{SaveMode, SparkSession} val spark = SparkSession.builder() .appName("Hudi Example") .master("local[2]") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .config("spark.sql.hive.convertMetastoreParquet", "false") .config("spark.sql.hive.caseSensitiveInferenceMode", "INFER_ONLY") .enableHiveSupport() .getOrCreate() val tableName = "my_hudi_table" val basePath = "/path/to/hudi/table" val hudiOptions = Map[String, String]( TABLE_TYPE_OPT_KEY -> MOR_TABLE_TYPE_OPT_VAL, RECORDKEY_FIELD_OPT_KEY -> "id", PRECOMBINE_FIELD_OPT_KEY -> "timestamp", PARTITIONPATH_FIELD_OPT_KEY -> "partition", KEYGENERATOR_CLASS_OPT_KEY -> classOf[SimpleKeyGenerator].getName, PATH_FIELD_OPT_KEY -> "path", HIVE_STYLE_PARTITIONING_OPT_KEY -> "true", HIVE_PARTITION_FIELDS_OPT_KEY -> "partition", HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY -> classOf[MultiPartKeysValueExtractor].getName ) val df = spark.emptyDataFrame df.write .format("org.apache.hudi") .options(hudiOptions) .option(PRECOMBINE_NUMBUCKETS_OPT_KEY, "12") .option(RECORDKEY_FIELD_OPT_KEY, "id") .option(PARTITIONPATH_FIELD_OPT_KEY, "partition") .option(TABLE_NAME, tableName) .mode(SaveMode.Overwrite) .save(basePath) ``` 这段代码创建了一个名为`my_hudi_table`的Hudi,并将其保存在`/path/to/hudi/table`的路径下。中包含`id`、`timestamp`和`partition`三个字段,其中`id`作为记录的唯一标识,`timestamp`用于记录写入时间,`partition`用于分区。 在这个例子中,我们使用了`SimpleKeyGenerator`作为键生成器,它将`id`作为记录的键。同时,我们使用了`MultiPartKeysValueExtractor`来从`partition`字段中提取分区信息。最后,我们将保存为MOR(Merge on Read)类型,并设置了预合并桶的数量为12。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值