表相关
创建表
如果加载数据后发现只有第一列有值,其它列都是null,而且第一列的值是所有列组合起来的,说明建表指定的分隔符和数据文件中的不一致
CREATE TABLE ` beidou_al_dd_out_hive_iqiyi_pca_app_evt_new_device` (
` u` string COMMENT '设备ID' ,
` p1` string COMMENT '平台ID' ,
` v` string COMMENT '版本号' )
PARTITIONED BY (
` dt` string COMMENT '数据日期' ,
` hashkey` string COMMENT 'hash分布键' )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS Parquet tblproperties ( 'parquet.compression' = 'GZIP' ) ;
复杂类型(ARRAY,MAP,struct)
create table test_set(
id INT ,
name STRING,
hobby ARRAY< STRING> ,
friend MAP< STRING, STRING> ,
mark struct< math:int , english:int >
)
row format delimited fields terminated by ','
collection items terminated by '_'
map keys terminated by ':'
lines terminated by '\n -- 行之间用' \n'分隔
-- 插入数据 分别通过array,str_to_map,named_struct来包装插入的三种集合数据
INSERT INTO test_set SELECT 2,' xiaohua',array(' basketball',' read '),str_to_map(' xiaoming:no , xiaohong:no '),named_struct(' math',90,' english',90)
-- 查询
select
id,
name,
hobby[0], -- 查询array用[下标]索引,下标从0开始
friend[' xiaohong'] ,
mark. math
from test_set
查看表schema
desc table
desc table partition ( dt= xxxx)
查看建表语句
show create table beidou_al_dd_out_hive_chd_app_profile_device_wide;
修改表属性
ALTER TABLE name RENAME TO new_name
ALTER TABLE name ADD COLUMNS ( col_spec[ , col_spec . . . ] )
ALTER TABLE name DROP [ COLUMN ] column_name
ALTER TABLE name CHANGE column_name new_name new_type comment '' ;
ALTER TABLE name REPLACE COLUMNS ( col_spec[ , col_spec . . . ] )
删除指定分区
alter table udw_mid_dd_inc_hive_iqiyi_app_itemview_abtest drop if exists partition ( dt= '${DT}' ) ;
修改分区列类型
alter table < 表名> partition column ( < 分区列1 > < 新类型> ) ;
删除分区后恢复数据
alter table xxx set TBLPROPERTIES( 'EXTERNAL' = 'true' ) ;
alter table xxxx drop partition ( dt= '2021-01-21' ) ;
alter table xxxx drop partition ( dt> '2021-01-21' ) ;
alter table xxxx drop partition ( dt> '2021-01-21' , hashkey= '-1' ) ;
alter table xxx set TBLPROPERTIES( 'EXTERNAL' = 'false' ) ;
msck repair table xxx;
HADOOP HDFS相关
查看文件
hadoop fs -ls /hive/warehouse/udw.db/udw_dwd_hh_fact_hive_iqiyi_app_itemview
hadoop fs -count /hive/warehouse/udw.db/udw_dwd_hh_fact_hive_iqiyi_app_itemview
-- hive应用下查看 将hadoop fs换成dfs
dfs -ls /hive/warehouse/udw.db/udw_dwd_hh_fact_hive_iqiyi_app_itemview
删除hdfs文件
hadoop fs -rm .. .
查看文件存储空间
hadoop fs -du -h /hive/warehouse/udw.db/udw_dwd_hh_fact_hive_iqiyi_app_itemview //列出所有分区的大小
hadoop fs -du -s -h /hive/warehouse/udw.db/udw_dwd_hh_fact_hive_iqiyi_app_itemview //所有分区总大小
查看分区
show partitions udw. udw_dwd_hh_fact_hive_nervi_app_click partition ( dt= '2021-10-31' ) ;
hadoop fs - ls / hive/ warehouse/ udw. db/ beidou_al_dd_out_hive_iqiyi_app_evt_itemview_passport | head - 10
hive - e "show partitions udw.udw_dwd_hh_fact_hive_nervi_app_click;" | head - 10
hive - e "show partitions udw.udw_dwd_hh_fact_hive_nervi_app_click;" | tail - 10
distcp数据文件同步
hadoop distcp -D mapreduce.job.queuename = root.bi_sh.day -m 500 -bandwidth 10 hdfs://namenode02-bdyf.qiyi.hadoop/hive/warehouse/udw.db/udw_dwd_dd_fact_hive_nervi_app_play/dt= 2021 -01-* /hive/warehouse/udw.db/udw_dwd_dd_fact_hive_nervi_app_play/
namenode02-bdwg-g3.qiyi.hadoop
msck repair table udw_dwd_dd_fact_hive_nervi_app_play
Hive设置
set mapreduce. job. queuename= bi_sh. day ;
set hive. cli. print . header= true ;
set hive. cli. print . row . to . vertical= true ;
set hive. cli. print . row . to . vertical. num= 1 ;
hive
hive
set hive. cli. print . current . db= true ;
set hive. cli. print . header= true ;
set mapred. job. name= p0@videochannel_yule_201808m_reduce_default ;
set hive. exec . max. dynamic. partitions. pernode= 2000 ;
set hive. exec . max. dynamic. partitions= 2000 ;
set hive. exec . dynamic. partition . mode = nonstrict;
set hive. exec . dynamic. partition = true ;
UDF
add jar viewfs://hadoop-bjzyx-g1/user/longyuan_udw/babel/uploaded_resources/UdwHiveUDF.jar;
create temporary function UidToHashKey as 'com.iqiyi.udw.util.UidToHashKey' ;
create temporary function StrToCombSequence as 'com.iqiyi.udw.util.StrToCombSequence' ;