sqoop import \
--connect jdbc:mysql://testurl:3306/user?tinyInt1isBit=false \
--username user \
--password user_passwd \
--table role \
--target-dir /user/hive/warehouse/test/role7 \
--delete-target-dir \
--outdir /tmp \
--fields-terminated-by '\001' \
--lines-terminated-by '\n' \
--map-column-java create_time=String,update_time=String \
--null-string '\\N' \
--null-non-string '\\N' \
--split-by id \
--num-mappers 1 \
--as-parquetfile
load data inpath '/user/hive/warehouse/test/role7' into table test.role7;
问题记录
1 如果直接导入到hive,类型转换会失败
–map-column-java create_time=String,update_time=String
此选项不会生效
2 tinyint(1)会转换成boolean类型
?tinyInt1isBit=false可以解决
3 分隔符无特殊要求,最好使用默认的
–fields-terminated-by ‘\001’
–lines-terminated-by ‘\n’ \
4 desc formatted 显示的是否压缩,可以通过查看parquet压缩文件确认,生成的类型也可以查看
java -jar /tmp/parquet-tools-1.6.0rc3-SNAPSHOT.jar schema -d /tmp/7713c78e-5a45-40f4-a78b-afc16389d782.parquet
[root@hadoop102 tmp]# java -jar /tmp/parquet-tools-1.6.0rc3-SNAPSHOT.jar schema -d /tmp/7713c78e-5a45-40f4-a78b-afc16389d782.parquet
message role {
optional int32 id;
optional binary role_no (UTF8);
optional binary name (UTF8);
optional binary company_no (UTF8);
optional int32 role_type;
optional binary notes (UTF8);
optional int32 enable;
optional binary create_user_no (UTF8);
optional binary update_user_no (UTF8);
optional binary create_time (UTF8);
optional binary update_time (UTF8);
}
creator: parquet-mr version 1.5.0-cdh5.16.1 (build ${
buildNumber})
extra: parquet.avro.schema = {
"type":"record","name":"role","doc":"Sqoop import of role","fields":[{
"name":"id"