Hive分层
ODS层:一般存储原始数据
DWD层:对ODS层的数据进行简单的处理
DWS层:对DWS层的数据进行进一步处理
ADS层:对DWS层的数据进行聚合统计,一般会导出到关系性数据库中提供后台查询
ODS层
直接对原始数据进行入库存储
ods层建表语句如下:
#! /bin/bash
echo "===prepare to create table test_ods_xxx_event==="
sql1="
use xxx_dataware;
CREATE EXTERNAL TABLE IF NOT EXISTS test_ods_xxx_event (
e_json string
)
PARTITIONED BY (\`dt\` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY "\t"
LOCATION '/user/hive/warehouse/test_ods/test_ods_xxx_event'
TBLPROPERTIES ("orc.compress"="SNAPPY");"
useSql="use xxx_dataware;"
sql="CREATE EXTERNAL TABLE IF NOT EXISTS test_ods_xxx_event (\`e_json\` string) PARTITIONED BY (\`dt\` string) ROW FORMAT DELIMITED FIELDS TERMINATED BY \"\t\" LOCATION '/user/hive/warehouse/test_ods/test_ods_xxx_event' TBLPROPERTIES (\"orc.compress\"=\"SNAPPY\");"
newsql="\"use xxx_dataware;select * from test_ods_xxx_event;\""
echo ${newsql}
beeline -u jdbc:hive2://xxx.xxx.xx.xxx:10000 -n metastore -p hive -e "${useSql}${sql}"
# beeline -n metastore -p hive -e ${newsql}
# hive -e ${sql}
echo "===finashed create table==="
数据载入语句:
#! /bin/bash
db=xxx_dataware
useSql="use xxx_dataware;"
if [ -n "$1" ] ; then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
hdfs dfs -ls /hdfsSink/xxxEvent/ | grep ${do_date} | awk '{print$8}' > tmp_hdfs_dir
cat tmp_hdfs_dir | while read line
do
# echo "$line"
sql="load data inpath 'hdfs:$line/*' into table test_ods_xxx_event partition(dt='${do_date}');"
# echo "$sql"
beeline -u jdbc:hive2://xxx.xxx.xx.xxx:10000 -n metastore -p hive -e "${useSql}${sql}"
done
DWD层
由于ods层存储的是json格式的数据,所以dwd只对json串进行解析:
dwd层创表语句如下:
#! /bin/bash
echo "===prepare to create table test_dwd_xxx_login_event==="
useSql="use xxx_dataware;"
loginSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_login_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string,\`ip\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_login_event' TBLPROPERTIES (\"parquet.compression\"=\"SNAPPY\");"
createUserSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_create_user_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_create_user_event' TBLPROPERTIES (\"parquet.compression\"=\"SNAPPY\");"
createRoleSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_create_role_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_create_role_event' TBLPROPERTIES (\"parquet.compression\"=\"SNAPPY\");"
levelSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_level_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string,\`exp\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_level_event' TBLPROPERTIES (\"parquet.compression\"=\"SNAPPY\");"
costItemSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_cost_item_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string,\`db_id\` string,\`item_id\` string,\`cost_num\` string,\`after_remain_num\` string,\`source_type\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_cost_item_event' TBLPROPERTIES (\"parquet.compression\"=\"SNAPPY\");"
getItemSql="CREATE EXTERNAL TABLE IF NOT EXISTS test_dwd_xxx_get_item_event (\`server_id\` string,\`create_time\` string,\`log_name\` string,\`account\` string,\`user_id\` string,\`role_id\` string,\`role_level\` string,\`role_name\` string,\`channel_key\` string,\`device_id\` string,\`gender\` string,\`db_id\` string,\`item_id\` string,\`add_num\` string,\`after_remain_num\` string,\`source_type\` string) PARTITIONED BY (\`dt\` string) STORED AS parquet LOCATION '/user/hive/warehouse/test_dwd/test_dwd_xxx_get_item_event' TBLPROPE