hadoop start-all.sh
zkS zkServer.sh start
hive2 hive --service hiveserver2
hive --service metastore
hbase start-hbase.sh hbase shell
zepplin zeppelin-daemon.sh start
%sh
hdfs dfs -rm -r /app
hdfs dfs -mkdir -p /app/data/exam
hdfs dfs -put /opt/returned_goods_log_7-9.csv /app/data/exam
hdfs dfs -cat /data../*.csv |wc -l
hbase 创建
create_namespace 'exam'
create 'exam:sales','statisitcs'
hive 外部表
create database exam
create external table exam.sale_service(
)
row format delimited fields terminated by ','
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties
(
'separatorChar' = ',',
'quoteChar' = '\"',
'escapeChar'= '\\'
)
location '/app/data/exam'
tblproperties('skip.header.line.count'='1') ---去首行
hive 映射 hbase
create external table exam.hbase_sale_service(
key string,
service int
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,result:service,result:praise")
TBLPROPERTIES("hbase.table.name" = "exam:sales")
数据导入hive映射表中
with
t1 as (select continent,countryName,recordDate,confirmedIncr,row_number() over(partition by recordDate order by cast(confirmedIncr as int) desc) rank from ex_exam_record),
t2 as (select continent,countryName,recordDate,confirmedIncr from t1 where rank=1 order by recordDate)
insert into ex_exam_covid19_record select concat(continent,recordDate) key,countryName maxIncreaseCountry,confirmedIncr maxIncreaseCount from t2
hbase查看
scan 'exam:sales',{LIMIT=>10}