130.Impala基准测试

  • 已使用hive-testbench生成好Hive的基准测试数据
  • 创建好了Hive的外部表
  • Hue查看
  • SQL脚本alltables_parquet.sql
    • 用于生成parquet格式的表
drop database if exists ${VAR:DB} cascade;
create database ${VAR:DB};
use ${VAR:DB};
set parquet_file_size=512M;
set COMPRESSION_CODEC=snappy;
drop table if exists call_center;
create table ${VAR:DB}.call_center
stored as parquet
as select * from ${VAR:HIVE_DB}.call_center;
drop table if exists catalog_page;
create table ${VAR:DB}.catalog_page
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_page;
drop table if exists catalog_returns;
create table ${VAR:DB}.catalog_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_returns;
drop table if exists catalog_sales;
create table ${VAR:DB}.catalog_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.catalog_sales;
drop table if exists customer_address;
create table ${VAR:DB}.customer_address
stored as parquet
as select * from ${VAR:HIVE_DB}.customer_address;
drop table if exists customer_demographics;
create table ${VAR:DB}.customer_demographics
stored as parquet
as select * from ${VAR:HIVE_DB}.customer_demographics;
drop table if exists customer;
create table ${VAR:DB}.customer
stored as parquet
as select * from ${VAR:HIVE_DB}.customer;
drop table if exists date_dim;
create table ${VAR:DB}.date_dim
stored as parquet
as select * from ${VAR:HIVE_DB}.date_dim;
drop table if exists household_demographics;
create table ${VAR:DB}.household_demographics
stored as parquet
as select * from ${VAR:HIVE_DB}.household_demographics;
drop table if exists income_band;
create table ${VAR:DB}.income_band
stored as parquet
as select * from ${VAR:HIVE_DB}.income_band;
drop table if exists inventory;
create table ${VAR:DB}.inventory
stored as parquet
as select * from ${VAR:HIVE_DB}.inventory;
drop table if exists item;
create table ${VAR:DB}.item
stored as parquet
as select * from ${VAR:HIVE_DB}.item;
drop table if exists promotion;
create table ${VAR:DB}.promotion
stored as parquet
as select * from ${VAR:HIVE_DB}.promotion;
drop table if exists reason;
create table ${VAR:DB}.reason
stored as parquet
as select * from ${VAR:HIVE_DB}.reason;
drop table if exists ship_mode;
create table ${VAR:DB}.ship_mode
stored as parquet
as select * from ${VAR:HIVE_DB}.ship_mode;
drop table if exists store_returns;
create table ${VAR:DB}.store_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.store_returns;
drop table if exists store_sales;
create table ${VAR:DB}.store_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.store_sales;
drop table if exists store;
create table ${VAR:DB}.store
stored as parquet
as select * from ${VAR:HIVE_DB}.store;
drop table if exists time_dim;
create table ${VAR:DB}.time_dim
stored as parquet
as select * from ${VAR:HIVE_DB}.time_dim;
drop table if exists warehouse;
create table ${VAR:DB}.warehouse
stored as parquet
as select * from ${VAR:HIVE_DB}.warehouse;
drop table if exists web_page;
create table ${VAR:DB}.web_page
stored as parquet
as select * from ${VAR:HIVE_DB}.web_page;
drop table if exists web_returns;
create table ${VAR:DB}.web_returns
stored as parquet
as select * from ${VAR:HIVE_DB}.web_returns;
drop table if exists web_sales;
create table ${VAR:DB}.web_sales
stored as parquet
as select * from ${VAR:HIVE_DB}.web_sales;
drop table if exists web_site;
create table ${VAR:DB}.web_site
stored as parquet
as select * from ${VAR:HIVE_DB}.web_site;

  • Impala基准测试数据生成
# 在Impala Daemon节点执行
[root@ip-xxx-xx-xx-xx ~]# impala-shell -i ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal --var=DB=tpcds_parquet_2 --var=HIVE_DB=tpcds_text_2 -f alltables_parquet.sql

  • Impala查看是否创建成功


  • analyze.sql创建
# 用于统计分析Impala的表
use ${VAR:DB};
compute stats call_center ;
compute stats catalog_page ;
compute stats catalog_returns ;
compute stats catalog_sales ;
compute stats customer_address ;
compute stats customer_demographics ;
compute stats customer ;
compute stats date_dim ;
compute stats household_demographics ;
compute stats income_band ;
compute stats inventory ;
compute stats item ;
compute stats promotion ;
compute stats reason ;
compute stats ship_mode ;
compute stats store_returns ;
compute stats store_sales ;
compute stats store ;
compute stats time_dim ;
compute stats warehouse ;
compute stats web_page ;
compute stats web_returns ;
compute stats web_sales ;
compute stats web_site ;

  • 对Impala的表进行统计分析
[root@ip-xxx-xx-xx-xx ddl-tpcds]# impala-shell -i ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal --var=DB=tpcds_parquet_2 -f analyze.sql

  • 批量运行脚本run_all_queries.sh
# 将结果输出到日志文件
[root@ip-xxx-xx-xx-xx impala-tpcds]# vim run_all_queries.sh 
#!/bin/bash

impala_demon=ip-xxx-xx-xx-xx.ap-southeast-1.compute.internal
database_name=tpcds_parquet_2
current_path=`pwd`
queries_dir=${current_path}/queries
rm -rf logs
mkdir logs
for t in `ls ${queries_dir}`
do
    echo "current query will be ${queries_dir}/${t}"
    impala-shell --database=$database_name -i $impala_demon -f ${queries_dir}/${t} &>logs/${t}.log
done
echo "all queries execution are finished, please check logs for the result!"
# 将脚本中impala_daemon和database_name修改为你自己环境的配置即可
  • 查看执行结果及运行时间


大数据视频推荐:
CSDN
人工智能算法竞赛实战
AIops智能运维机器学习算法实战
ELK7 stack开发运维实战
PySpark机器学习从入门到精通
AIOps智能运维实战
大数据语音推荐:
ELK7 stack开发运维
企业级大数据技术应用
大数据机器学习案例之推荐系统
自然语言处理
大数据基础
人工智能:深度学习入门到精通

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值