原始数据存放在 inner_person_t这张表中
select * from inner_person_t;
方式1
1. 创建schema
文件名为:schema.avsc
内容如下:
{
"type": "record",
"name": "avroPeople",
"namespace": "com.china",
"fields": [{
"name": "id",
"type": "int"
}, {
"name": "age",
"type": "int"
}, {
"name": "sex",
"type": "string"
}
]
}
2.将schema文件放到指定的hdfs目录上
hadoop fs -put schema.avsc /user/xiangyongqiao15/hivedata/avro_schema
3.创建表
CREATE TABLE IF NOT EXISTS avro_people
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
WITH SERDEPROPERTIES ('avro.schema.url'='/user/xiangyongqiao15/hivedata/avro_schema/schema.avsc')
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
4.导入数据
insert overwrite table avro_people select * from inner_person_t;
5.查看数据
select * from avro_people;
6.添加字段时的注意事项
AVRO表新添加字段时,一定要给这个新字段设置默认值
{
"type": "record",
"name": "avroPeople",
"namespace": "com.china",
"fields": [{
"name": "id",
"type": "int"
}, {
"name": "age",
"type": "int"
}, {
"name": "sex",
"type": "string"
}
, {
"name": "country",
"type": "string",
"default":"China"
}
]
}