2.自定义函数UDF
1.创建一张表
create table t_student_udf(id int,name string,tel string)
row format delimited
fields terminated by ','
stored as textfile;
2.上传数据
load data local inpath '/home/hdfs/data_getarea' into table t_student_udf;
文件内容:
1,张三,13834112233
2,李四,13994200987
3,王五,13302019922
4,jack,13211223344
3.自定义UDF,根据电话号码获取所在区域
3.1在pom文件中添加依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
3.2写一个java类extends UDF,定义某个逻辑evaluate
package com.briup.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.HashMap;
import java.util.Map;
public class MyUDF extends UDF {
static Map<String,String> index=new HashMap<>();
static {
index.put("138","北京");
index.put("139","苏州");
index.put("133","南昌");
}
public String evaluate(String phone){
String phoneHead=phone.substring(0,3);
String s=index.get(phoneHead);
if(s==null){
return "未知";
}
return s;
}
}
3.3 打成jar 上传到hive所在的节点
3.4 在hive中创建一个函数,和jar中的自定义类建立关联
add jar /home/hdfs/udf.jar
create temporary function getArea as 'com.briup.udf.MyUDF';
show functions;
select id,name,tel,getArea(tel) from t_student_udf;
6.Hive的Java客户端
前置步骤
1.hdfs-site.xml
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
2.core-site.xml
<property>
<name>hadoop.proxyuser.briup.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.briup.groups</name>
<value>*</value>
</property>
以上两个属性的第三位指的是hive所在机器的用户名
3.启动thrift服务
1 hive启动服务,保持持续运行
2 hiveserver2
hive --service hiveserver2 &
默认监听10000
3.在代码中指定DriverClassName为:org.apache.hive.jdbc.HiveDriver
url为:jdbc:hive2://127.0.0.1:10000/bd1803
4.测试thrift服务是否启动成功
beeline
!connect jdbc:hive2://127.0.0.1:10000
输入用户名(与core-site.xml里面设置的代理对象名称一致)
输入密码(与core-site.xml里面设置的代理对象密码一致)
!quit 退出
5.编写Java程序
1.pom文件中添加依赖
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>3.1.2</version>
</dependency>
2.构建对应的package和类
package com.briup.base;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.sql.*;
public class HiveOper {
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
private Connection conn;
@Before
public void init() {
Logger.getLogger("org").setLevel(Level.WARN);
try{
Class.forName(driverName);
conn = DriverManager.getConnection("jdbc:hive2://127.0.0.1:10000", "briup", "briup");
System.out.println(conn);
}catch (Exception e){
e.printStackTrace();
}
}
@Test
public void createDB() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("create database if not exists conn_java");
stmt.close();
System.out.println("database create Successfully");
}
@Test
public void dropDB() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("drop database if exists conn_java");
stmt.close();
System.out.println("database drop Successfully");
}
//创建内部表
@Test
public void createManagerTable() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("create table if not exists conn_java.t_stu(id int,name varchar(20),score float) row format delimited fields terminated by ',' stored as textfile");
System.out.println("Table create Successfully");
}
@Test
public void dropTable() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("drop table if exists conn_java.t_stu");
System.out.println("Table drop Successfully");
}
@Test
public void insertDataToTable() throws Exception{
String sql="insert into conn_java.t_stu values(?,?,?)";
PreparedStatement pstmt = conn.prepareStatement(sql);
pstmt.setInt(1,12019001);
pstmt.setString(2,"张三");
pstmt.setFloat(3,68.5f);
pstmt.execute();
System.out.println("Insert Data Successfully");
}
@Test
public void loadHDFSDataToTable() throws Exception{
String sql="load data inpath '/user/extr/phone_data' into table conn_java.t_stu";
PreparedStatement pstmt = conn.prepareStatement(sql);
pstmt.execute();
System.out.println("load Data Successfully");
}
@Test
public void loadLocalDataToTable() throws Exception{
String sql="load data local inpath '/home/briup/phone_data' into table conn_java.t_stu";
PreparedStatement pstmt = conn.prepareStatement(sql);
pstmt.execute();
System.out.println("load Data Successfully");
}
//创建外部表
@Test
public void createExternalTable() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("create external table if not exists conn_java.t_phone_extr(id int,name varchar(20),score float) row format delimited fields terminated by ',' stored as sequencefile location '/user/extr/' " );
System.out.println("External Table create Successfully");
}
@Test
public void dropExternalTable() throws Exception{
Statement stmt = conn.createStatement();
stmt.execute("drop table if exists conn_java.t_phone_extr");
System.out.println("External Table drop Successfully");
}
@Test
public void insertBySelect() throws Exception{
//overwrite
String sql="insert into conn_java.t_phone_extr select * from conn_java.t_stu";
PreparedStatement pstmt = conn.prepareStatement(sql);
pstmt.execute();
System.out.println("InsertBySelect Data Successfully");
}
@Test
public void selectData() throws Exception{
//overwrite
String sql="select id,name,score from conn_java.t_stu";
PreparedStatement pstmt = conn.prepareStatement(sql);
ResultSet set=pstmt.executeQuery();
while (set.next()){
int id=set.getInt(1);
String name=set.getString(2);
float score=set.getFloat(3);
System.out.println(id+":"+name+":"+score);
}
set.close();
pstmt.close();
System.out.println("InsertBySelect Data Successfully");
}
@After
public void close() throws Exception{
if(conn!=null){
conn.close();
}
}
}
7.与Hbase交互
1.hbase中建表
create 'person',{NAME => 'f1',VERSIONS => 1},{NAME => 'f2',VERSIONS => 1},{NAME => 'f3',VERSIONS => 1}
put 'person','1001','f1:name','jack'
put 'person','1001','f2:age','18'
put 'person','1002','f1:name','jack'
put 'person','1003','f3:position','ceo'
2.进入hive命令
SET hbase.zookeeper.quorum=localhost:2181;
SET zookeeper.znode.parent=/hbase;
ADD jar /home/briup/apache-hive-2.3.3-bin/lib/hive-hbase-handler-2.3.3.jar;
CREATE EXTERNAL TABLE person (
rowkey string,
f1 map<STRING,STRING>,
f2 map<STRING,STRING>,
f3 map<STRING,STRING>
) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f1:,f2:,f3:")
TBLPROPERTIES ("hbase.table.name" = "person");
•hbase.zookeeper.quorum 指定HBase使用的zookeeper集群,默认端口是2181,可以不指定,如果指定,格式为zkNode1:2222,zkNode2:2222,zkNode3:2222
•zookeeper.znode.parent 指定HBase在zookeeper中使用的根目录
•hbase.columns.mapping Hive表和HBase表的字段映射关系,分别为:Hive表中第一个字段映射:key(rowkey),第二个字段映射列族f1,第三个字段映射列族f2,第四个字段映射列族f3
•hbase.table.name HBase中表的名字
INSERT INTO TABLE person
SELECT 'row1' AS rowkey,
map('c3','name3') AS f1,
map('c3','age3') AS f2,
map('c4','job3') AS f3 ;