Hive分区表导入txt文件
- txt文件数据如下所示
1351698**** 196037895 59.28% 25.74 659.02 419 50 语音类:干扰类:覆盖类 baf4d04:baf4d07:baf4d01 196037892:196037895:196037889 20200527
1358868**** 190464769 44.87% 22.52 646.7 887 49 覆盖类:干扰类:语音类 4ee71a1:b5a4303:b5a4301 82735521:190464771:190464769 20200527
- 创建临时表ods_zl_text_cem_bad_family_day_temp
create table ods_zl_text_cem_bad_family_day_temp
(
msisdn varchar(255),
cellid varchar(255),
kpi_volte_rtp_pkt_loss_ratio varchar(255),
kpi_volte_rtp_jitter varchar(255),
kpi_volte_rtp_max_delay varchar(255),
kpi_volte_time varchar(255),
score_overall varchar(255),
potential_cause varchar(255),
cellid_all varchar(255),
p_day string,
col10 varchar(255)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
stored as textfile;
3.上传文件到hdfs上面
hadoop fs -put /home/zhanglong/cem_bad_family_day.txt /test/
4.导入到ods_zl_text_cem_bad_family_day_temp临时表里面
LOAD DATA INPATH '/test/cem_bad_family_day.txt' INTO TABLE ods_zl_text_cem_bad_family_day_temp;
5.创建一个分区表(以p_day分区)
create table ods_zl_test_cem_bad_family_day
(
msisdn varchar(255),
cellid varchar(255),
kpi_volte_rtp_pkt_loss_ratio varchar(255),
kpi_volte_rtp_jitter varchar(255),
kpi_volte_rtp_max_delay varchar(255),
kpi_volte_time varchar(255),
score_overall varchar(255),
potential_cause varchar(255),
cellid_all varchar(255),
col10 varchar(255)
)PARTITIONED BY (p_day string);
6.导入到分区表里面
insert overwrite table
ods_zl_test_cem_bad_family_day partition(p_day)
select msisdn,cellid,kpi_volte_rtp_pkt_loss_ratio,kpi_volte_rtp_jitter,kpi_volte_rtp_max_delay,kpi_volte_time,score_overall,potential_cause,cellid_all,col10,p_day
from ods_zl_text_cem_bad_family_day_temp;
**注意:p_day一定要放在最后面才可以完整的进行分区。**
- 查看分区
show partitions ods_zl_test_cem_bad_family_day;
- 查询数据
select * from ods_zl_test_cem_bad_family_day where p_day=20200530;