impala查询语句_impala使用sql笔记

impala使用sql笔记

2018年09月11日 20:40:03 虾哔哔 阅读数:185

版权声明:本文为博主原创文章,未经博主允许不得转载。https://blog.csdn.net/weixin_35852328/article/details/82632347

#修改分隔符

alter table firstround.person_aaa_crime_criminalperson1 set SERDEPROPERTIES('field.delim'='\001');

#查看建表语句

show CREATE TABLE firstround.banklog_decrypt

#修改字段类型

ALTER TABLE data_market.last_process_time CHANGE hive_count_num hive_count_num string

#更改表名

ALTER TABLE transfer_sjdk.bigdata_blacklist RENAME TO transfer_sjdk.tbl_riskprice_it;

#添加表字段

ALTER TABLE transfer_sjdk.bigdata_blacklist_history ADD COLUMNS (is_black string);

#删除表字段

ALTER TABLE transfer_sjdk.bigdata_blacklist_history drop is_black;

#清除表数据

TRUNCATE transfer_sjdk.tbl_riskprice_it;

#刷新表

REFRESH transfer_sjdk.tbl_riskprice_it;

#刷新表元数据

INVALIDATE METADATA transfer_sjdk.tbl_riskprice_it;

#重新shuffle数据

INSERT OVERWRITE data_market.repayment_split_table SELECT * FROM data_market.repayment_split_table;

#复制表结果和数据

create table transfer_zhifu_real.sy_role as select * from transfer_zhifu.sy_role;

#复制表结构

create table transfer_zhifu_real.sy_role like transfer_zhifu.sy_role;

#修改表名

ALTER TABLE dh_0016_bigdata_08 RENAME TO dh_0016_bigdata

#修改表字段类型

alter TABLE users CHANGE dt dt string

#显示所有函数

show functions;

#查看函数用法(hive)

describe function substr;

#导出查询结果到文件

impala-shell -q "select * from table limit 10" -B --output_delimiter="\t" -o output.txt

#收集表的统计信息,让Impala 基于每一个表的大小、每一个列不同值的个数、等等信息自动的优化查询。

compute stats firstround.person_aaa_crime_escapeperson;

#导入和导出

export table poc.chan_info to '/user/hive/test/chan_info';

import from '/user/hive/test/chan_info';

#分区表导出和导入

export table poc.cust_basic_info partition (etl_dt="2017-12-14") to '/user/hive/test/cust_basic_info14';

import from '/user/hive/test/cust_basic_info14';

#import重命名表

import table cust_basic_info from '/user/hive/test/cust_basic_info14';

#导出表并且导入到分区表分区

import table cust_basic_info partition (etl_dt="2017-12-14") from '/user/hive/test/cust_basic_info';

#指定导入位置

import table cust_basic_info from '/user/hive/test/cust_basic_info' location '/user/hive/test/cust_basic_info';

#导入作为一个外部表

import external table cust_basic_info from '/user/hive/test/cust_basic_info';

#强制删除数据库

DROP DATABASE zxfinance_alpha1_tuomin cascade

#数据类型转换

SELECT cast(substr(createdate,1,10) as int)/86400 created FROM frontlog

#分组排序

row_number() over (PARTITION BY t4.extractcashbillid,t1.ze_checktime ORDER BY t4.overduedateb DESC) flag

除Row_number外还有rank,dense_rank

rank() over([partition by col1] order by col2)

dense_rank() over([partition by col1] order by col2)

row_number() over([partition by col1] order by col2)

rank排序时出现相等的值时会有并列,即值相等的两条数据会有相同的序列值

row_number的排序不允许并列,即使两条记录的值相等也不会出现相等的排序值

dense_rank排序的值允许并列,但会跳跃的排序,像这样:1,1,3,4,5,5,7.

#添加udf 方法

impala-shell -q "create function AES(string) returns string location '/user/hive/udf/aesUDF.jar' symbol='com.netease.hive.udf.AESUDF';"

#创建分区表

CREATE TABLE IF NOT EXISTS asset_auxiliary.dh_0016_bigdata_history(

autoindex INT, vindicatorteamid STRING,

createtime STRING)

PARTITIONED BY (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS TEXTFILE;

#添加分区

alter table asset_auxiliary.dh_0016_bigdata_history add partition (dt='2017-12-12');

#删除分区

ALTER TABLE asset_auxiliary.dh_0016_bigdata_history DROP IF EXISTS PARTITION(dt='2017-12-12');

#加载数据

LOAD DATA INPATH ‘/user/hive/warehouse/asset_auxiliary.db/dh_0016_bigdata_history/2017-12-12/000000_0’ INTO TABLE asset_auxiliary.dh_0016_bigdata_history PARTITION(dt='2017-12-12');

#加载数据到hdfs(hive)

insert overwrite directory '/user/hive/warehouse/asset_auxiliary.db/dh_0016_bigdata_history/dt=2017-12-12'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-12'

#加载数据到本地(hive)

insert overwrite local directory '/data/2017-12-12'

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-12'

#插入数据到表分区

insert into table asset_auxiliary.dh_0016_bigdata_history PARTITION(dt='2017-12-10') select * from asset_auxiliary.dh_0016_bigdata_tmp where queryday = '2017-12-10'

#展示表中有多少分区

show partitions asset_auxiliary.dh_0016_bigdata_history;

#刷新分区

REFRESH asset_auxiliary.dh_0016_bigdata PARTITION(dt='2017-12-12');

#修复表分区 (hive)

msck repair table 表名

#hive 建表

create table test_label.user_json

(

userid string,

accountid string,

banknumber string,

bankname string,

isreceiveaccount string,

isrepayaccount string,

birthday string,

createtime string

)

comment '用户大宽表'

partitioned by(createtime string comment '时间分区字段')

row format delimited

fields terminated by '\t'

stored as textfile location '/user/hdfs/fico/output/1491964346/user_back_json/';

###impala查询内存限制Memory limit exceeded

ERROR:

Memory limit exceeded

Query did not have enough memory to get the minimum required buffers in the block manager.

mem_limit=-1

default_pool_mem_limit=-1b

note:https://blog.csdn.net/oosongoo/article/details/78435779

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值