beeline -u jdbc:hive2://node-1:10000 -n zpark
- 初始化数据库
drop database if exists epidemic_ods cascade;
drop database if exists epidemic_da cascade;
create database if not exists epidemic_ods;
create database if not exists epidemic_da;
- 创建表
use epidemic_ods;
create external table epidemic_ods.epidemic_ods_gy(
years string,
name string,
monthly_growth int,
cumulative int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
- 加载数据
load data inpath '/clean/red_data/gy/part-r-00000' into table epidemic_ods.epidemic_ods_gy;
- 创建数据库DW
参数设置:
--1.设置动态分区
set hive.exec.dynamic.partition=true;
--2.设置动态分区最大的分区数(默认100)
set hive.exec.max.dynamic.partitions=50000;
set hive.exec.max.dynamic.partitions.pernode=10000;
--3.设置非严格模式
set hive.exec.dynamic.partition.mode=nonstrict;
drop database if exists epidemic_dw cascade;
create database if not exists epidemic_dw;
- 创建表 DW层
create table epidemic_dw.epidemic_dw_gy(
years string,
year string,
name string,
monthlygrowth int,
cumulative int
)
partitioned by (partition_date string comment '分区日期')
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n';
- 加载数据 DW
use epidemic_dw;
insert overwrite table epidemic_dw.epidemic_dw_gy partition(partition_date)
select
years,
name,
monthly_growth,
cumulative,
substring(years,0,4) year,
date_format(current_timestamp(), 'yyyy-MM-dd') as partition_date
from epidemic_ods.epidemic_ods_gy;
测试数据
select * from epidemic_dw_gy limit 10;
创建视图层
create table epidemic_dw.pidemic_view as
select * from epidemic_dw_gy;
7、mysql宽表
drop database if exists residence;
create database if not exists residence CHARACTER SET utf8 COLLATE utf8_general_ci;
create table residence.residence_dw(
years varchar(255),
year varchar(255),
name varchar(255),
monthlygrowth int,
cumulative int,
partition_date varchar(255)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
8*脚本编写 导出数据到mysql
sqoop export \
--connect "jdbc:mysql://node-1:3306/epidemic?useUnicode=true&characterEncoding=utf-8" \
--username root --password root \
--table epidemic_dw \
--export-dir /user/hive/warehouse/epidemic_dw.db/pidemic_view \
--input-fields-terminated-by '\001' \
--input-null-string '\\N' \
--input-null-non-string '\\N'