进入hive
[root@host-192-125-30-10 ~]# source /opt/hadoopclient/bigdata_env
[root@host-192-125-30-10 ~]# kinit -kt /mnt/test_java2_keytab/user.keytab test_java2
[root@host-192-125-30-10 ~]# spark-beeline
显示为:
It's running the fi spark-beeline, it calls /opt/hadoopclient/Spark2x/spark/bin/beeline
and helps to connect to the JDBCServer automatically
Connecting to jdbc:hive2://192.123.78.3:24002,192.123.78.4:24002,192.123.78.5:24002;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=sparkthriftserver2x;saslQop=auth-conf;auth=KERBEROS;principal=spark2x/hadoop.hadoop.com@HADOOP.COM;
2020-07-29 15:21:48,927 | WARN | main | Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | org.apache.hadoop.util.NativeCodeLoader.<clinit>(NativeCodeLoader.java:60)
Connected to: Spark SQL (version 2.3.2)
Running with YARN Application = application_1581656932438_3597
Driver: Hive JDBC (version 1.2.1.spark_2.3.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.2.1.spark_2.3.2 by Apache Hive
0: jdbc:hive2://192.123.78.5:22550/default>
将文件导入hive
1.在Linux命令行中将csv上传到hdfs的origin
hadoop fs -put /本地csv文件绝对路径 /warehouse/origin/自己找个文件夹
若无目的文件夹,可在hdfs创建文件夹
hadoop fs -mkdir /warehouse/origin/example
2.在Linux命令行中进入hive
spark-beeline --可进入spark内核的hive命令行,计算快,上千万数据量的sql容易卡崩
beeline --可进入mapreduce内核的hive命令行,计算慢,但绝不会崩
3.将文件数据拉到ods层
3.1.ods建表
–在beeline中建表
CREATE TABLE dwd_center.aaa_sswj(
sh string,
sfzh string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES(
"separatorChar"=","
)
STORED AS TEXTFILE;
–在spark-beeline中建表 --spark-beeline中必须有location,beeline中可以没有
CREATE TABLE dwd_center.aaa_sswj(
sh string,
sfzh string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES(
"separatorChar"=","
)
STORED AS TEXTFILE
location 'hdfs://hacluster/warehouse/dwd_center/aaa_sswj';
–separatorChar:分隔符
–quoteChar:引号符
–escapeChar:转意符
–3.2.拉取origin数据
–加载Linux本地数据
load data local inpath '/home/hadoop/load1.txt' into table tb_load1; --本地文件的分隔符、换行符必须和ods层的表设置中完全一致
–加载hdfs数据
load data inpath '/hive/test/*' into table tb_load1; --必须加*。否则hdfs的 /hive/test 会被整体移动到ods表目录下。
–4.将ods拉到dwd
–4.1.dwd建表
–在beeline命令行中
drop table if exists dwd_center.xxx;
create external table IF NOT EXISTS dwd_center.xxx (
id bigint comment '序号',
yjcode string comment '预警编号',
clzt string comment '处理状态',
clztcode string comment '处理状态编码',
clr string comment '处理人',
clrcode string comment '处理人编码',
clsj string comment '处理时间',
clyj string comment '处理意见',
clbz string comment '处理备注'
) comment '预警-处理信息(全量)'
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as textfile
;
–在spark-beeline命令行中类同5.1
–6.2.拉取ods数据
insert into dwd_center.xxx select * from ods.csv_table;
–常用命令
1、查看表的结构和路径
desc formatted aaa_dw_xx_fs_shu;
2、查看数据库和表结构
use dwd_center_tmp; show tables;