一、Hadoop建表
1.1 创建无分区表
CREATE TABLE wxwy.DIM_CITY
(
UNIQUE_KEY STRING,
CITY_NAME STRING,
COMPANY_ID DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
TBLPROPERTIES ('serialization.null.format'='')
1.2 创建分区表
CREATE TABLE wxwy.F_L_C_SctpAssoc_Q
(
dn STRING,
sctpassoc_uk STRING,
sctpassoc_name STRING,
managedelement_uk STRING,
omc_uk STRING,
omc_name STRING,
vendor_uk STRING,
vendor_name STRING,
sig_sctpdatachunksent DOUBLE,
sig_sctpdatachunkreceived DOUBLE,
sig_sctpdatachunkresent DOUBLE,
sig_sctpcongestionduration DOUBLE,
sig_nbrsctpcongestion DOUBLE,
sig_sctpunavailableduration DOUBLE,
sig_nbrsctpunavailable DOUBLE,
start_time STRING,
end_time STRING,
rmuid STRING
)
PARTITIONED BY
(
partitionday STRING,
partitionhour STRING )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
TBLPROPERTIES ('serialization.null.format'='')
1.3 创建压缩表
drop table if exists LTE_MRE;
CREATE TABLE `LTE_MRE`
(
`province_name` STRING ,
`vendor` STRING ,
`fileFormatVersion` STRING ,
`reportTime` TIMESTAMP ,
`startTime` TIMESTAMP ,
`endTime` TIMESTAMP ,
`enb_id` BIGINT ,
`userLabel` STRING ,
`eventType` STRING ,
`object_mmeCode` STRING ,
`object_mmeGroupId` STRING ,
`object_mmeUeS1apId` STRING ,
`object_timeStamp` TIMESTAMP ,
`object_id` STRING ,
`MR_LteScRSRP` BIGINT ,
`MR_LteNcRSRP` BIGINT ,
`MR_LteScRSRQ` BIGINT ,
`MR_LteNcRSRQ` BIGINT ,
`MR_LteScTadv` BIGINT ,
`MR_LteScPHR` BIGINT ,
`MR_LteScRIP` BIGINT ,
`MR_LteScAOA` BIGINT ,
`MR_LteScPlrULQci1` BIGINT ,
`MR_LteScPlrULQci2` BIGINT ,
`MR_LteScPlrULQci3` BIGINT ,
`MR_LteScPlrULQci4` BIGINT ,
`MR_LteScPlrULQci5` BIGINT ,
`MR_LteScPlrULQci6` BIGINT ,
`MR_LteScPlrULQci7` BIGINT ,
`MR_LteScPlrULQci8` BIGINT ,
`MR_LteScPlrULQci9` BIGINT ,
`MR_LteScPlrDLQci1` BIGINT ,
`MR_LteScPlrDLQci2` BIGINT ,
`MR_LteScPlrDLQci3` BIGINT ,
`MR_LteScPlrDLQci4` BIGINT ,
`MR_LteScPlrDLQci5` BIGINT ,
`MR_LteScPlrDLQci6` BIGINT ,
`MR_LteScPlrDLQci7` BIGINT ,
`MR_LteScPlrDLQci8` BIGINT ,
`MR_LteScPlrDLQci9` BIGINT ,
`MR_LteScSinrUL` BIGINT ,
`MR_LteScRI1` BIGINT ,
`MR_LteScRI2` BIGINT ,
`MR_LteScRI4` BIGINT ,
`MR_LteScRI8` BIGINT ,
`MR_LteScPUSCHPRBNum` BIGINT ,
`MR_LteScPDSCHPRBNum` BIGINT ,
`MR_LteScBSR` BIGINT ,
`MR_LteSceNBRxTxTimeDiff` BIGINT ,
`MR_LteScEarfcn` BIGINT ,
`MR_LteScPci` BIGINT ,
`MR_LteNcEarfcn` BIGINT ,
`MR_LteNcPci` BIGINT ,
`MR_GsmNcellBcch` BIGINT ,
`MR_GsmNcellCarrierRSSI` BIGINT ,
`MR_GsmNcellNcc` BIGINT ,
`MR_GsmNcellBcc` BIGINT ,
`MR_TdsPccpchRSCP` BIGINT ,
`MR_TdsNcellUarfcn` BIGINT ,
`MR_TdsCellParameterId` BIGINT
)
partitioned by (`province` STRING,`partitiondate` STRING,`partitionvendor` STRING)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'=',',
'line.delim'='\n',
'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
注意:
1、创建压缩表时需用`(也就是数字1左边的键)。
2、创建压缩表时登录hive,其他表hive和impala均可。
3、hive上运行命令之后最好用impala刷新库
语句:invalidate metadata;
二、hadoop 新增字段,并指定位置
hadoop 新增字段:
alter table nr_mro_txt add columns (haoa INT COMMENT 'gNB天线水平到达角');
其中nr_mro_txt 是表名,haoa 是字段,INT 是字段类型。
指定字段位置
首先新增字段,此时该字段为最后一列。
alter table nr_mro_hour add columns (rsrp3 double);
移动该字段到rsrp2之后,rsrp4 之前。
语句:
alter table nr_mro_hour change rsrp3 rsrp3 double after rsrp2;
(实践经验:用impala移动字段位置时运行失败,但用hive可以)
三、Hadoop表分区管理
删除表分区:
ALTER TABLE f_5_c_s_nrcellcu_q DROP IF EXISTS PARTITION (partitionday="20240103",partitionhour="11")
新增表分区:
ALTER TABLE f_5_c_s_nrcellcu_q ADD IF NOT EXISTS PARTITION (partitionday="20240103",partitionhour="11")