Spark 之 Complex Type

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java

for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
      Type t = requestedSchema.getFields().get(i);
      if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) {
        throw new UnsupportedOperationException("Complex types not supported.");
      }

isPrimitive 是parquet 里面的type 类型接口

Generate Complex Data
spark.range(2).select(col("id"), expr("1").as("kind"),
        expr("array(1, 2)").as("arr_field"),
        expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
        expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
        expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
        expr("struct(1, 2)").as("struct_field"),
        expr("struct(1, struct(1, 2))").as("struct_struct_field"),
        expr("struct(1, array(1, 2))").as("struct_array_field"),
        expr("map(1, 2)").as("map_field"),
        expr("map(1, map(3,4))").as("map_map_field"),
        expr("map(1, array(1, 2))").as("map_arr_field"),
        expr("map(struct(1, 2), 2)").as("map_struct_field"))
        .coalesce(1)
        .write
        .format("parquet")
        .mode("overwrite")
        .parquet("lparquet")

    spark.range(2).select(col("id"), expr("id % 2").as("kind"),
      expr("array(1, 2)").as("arr_field"),
      expr("struct(1, 2)").as("struct_field"))
        .coalesce(1)
        .write
        .format("parquet")
        .mode("overwrite")
        .parquet("rparquet")
        
spark.catalog.createTable("ltab", "lparquet", "arrow")
spark.catalog.createTable("rtab", "rparquet", "arrow")
val dfr=spark.range(2).select(col("id"), expr("id % 2").as("kind"),
        expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
        expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
        expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
        expr("struct(1, struct(1, 2))").as("struct_struct_field"),
        expr("struct(1, array(1, 2))").as("struct_array_field"),
        expr("map(1, map(3,4))").as("map_map_field"),
        expr("map(1, array(1, 2))").as("map_arr_field"),
        expr("map(struct(1, 2), 2)").as("map_struct_field"))
dfr.createOrReplaceTempView("rtab")
spark.sql("SELECT * from rtab").show
spark.sql("SELECT * from rtab").printSchema

OutPut

scala> spark.sql("SELECT * from rtab").show
+---+----+----------------+----------------+--------------------+-------------------+------------------+---------------+-------------+----------------+
| id|kind|   arr_arr_field|arr_struct_field|       arr_map_field|struct_struct_field|struct_array_field|  map_map_field|map_arr_field|map_struct_field|
+---+----+----------------+----------------+--------------------+-------------------+------------------+---------------+-------------+----------------+
|  0|   0|[[1, 2], [3, 4]]|[{1, 2}, {1, 2}]|[{1 -> 2}, {3 -> 4}]|        {1, {1, 2}}|       {1, [1, 2]}|{1 -> {3 -> 4}}|{1 -> [1, 2]}|   {{1, 2} -> 2}|
|  1|   1|[[1, 2], [3, 4]]|[{1, 2}, {1, 2}]|[{1 -> 2}, {3 -> 4}]|        {1, {1, 2}}|       {1, [1, 2]}|{1 -> {3 -> 4}}|{1 -> [1, 2]}|   {{1, 2} -> 2}|
+---+----+----------------+----------------+--------------------+-------------------+------------------+---------------+-------------+----------------+


scala> spark.sql("SELECT * from rtab").printSchema
   def printSchema(level: Int): Unit   def printSchema(): Unit

scala> spark.sql("SELECT * from rtab").printSchema
root
 |-- id: long (nullable = false)
 |-- kind: long (nullable = true)
 |-- arr_arr_field: array (nullable = false)
 |    |-- element: array (containsNull = false)
 |    |    |-- element: integer (containsNull = false)
 |-- arr_struct_field: array (nullable = false)
 |    |-- element: struct (containsNull = false)
 |    |    |-- col1: integer (nullable = false)
 |    |    |-- col2: integer (nullable = false)
 |-- arr_map_field: array (nullable = false)
 |    |-- element: map (containsNull = false)
 |    |    |-- key: integer
 |    |    |-- value: integer (valueContainsNull = false)
 |-- struct_struct_field: struct (nullable = false)
 |    |-- col1: integer (nullable = false)
 |    |-- col2: struct (nullable = false)
 |    |    |-- col1: integer (nullable = false)
 |    |    |-- col2: integer (nullable = false)
 |-- struct_array_field: struct (nullable = false)
 |    |-- col1: integer (nullable = false)
 |    |-- col2: array (nullable = false)
 |    |    |-- element: integer (containsNull = false)
 |-- map_map_field: map (nullable = false)
 |    |-- key: integer
 |    |-- value: map (valueContainsNull = false)
 |    |    |-- key: integer
 |    |    |-- value: integer (valueContainsNull = false)
 |-- map_arr_field: map (nullable = false)
 |    |-- key: integer
 |    |-- value: array (valueContainsNull = false)
 |    |    |-- element: integer (containsNull = false)
 |-- map_struct_field: map (nullable = false)
 |    |-- key: struct
 |    |    |-- col1: integer (nullable = false)
 |    |    |-- col2: integer (nullable = false)
 |    |-- value: integer (valueContainsNull = false)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值