hive java 的demo

本文介绍如何在Linux环境中启动Hive Thrift Server,并演示如何进行Hive的Java编程,帮助开发者进行Hive的数据操作。
摘要由CSDN通过智能技术生成

首先假定你的Hive已经部署完毕。

导入hive 下所有包

linux 下启动您的Hive:

[root@xxx bin]# hive --service hiveserver 50031
Starting Hive Thrift Server

 

Hive 连接
复制代码
 1 package hadoop.demo.hive;
 2 
 3 import java.sql.Connection;
 4 import java.sql.DriverManager;
 5 import java.sql.SQLException;
 6 
 7 public class getConnect {
 8     private static Connection conn = null;
 9     private static Connection connToMysql = null;
10 
11     private getConnect() {
12     }
13 
14     // 获得hive连接
15     public static Connection GetHiveConn() throws SQLException {
16         if (conn == null) {
17             try {
18                 Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
19             } catch (ClassNotFoundException e) {
20 
21                 e.printStackTrace();
22                 System.exit(1);
23             }
24             conn = DriverManager.getConnection(
25                     "jdbc:hive://Ip:50031/default", "", "");
26         }
27         return conn;
28     }
29 
30     // 获得sql连接
31     public static Connection getMySqlConn() throws SQLException {
32         if (connToMysql == null) {
33             try {
34                 Class.forName("com.mysql.jdbc.Driver");
35             } catch (ClassNotFoundException e) {
36 
37                 e.printStackTrace();
38                 System.exit(1);
39             }
40             connToMysql = DriverManager.getConnection(
41                     "jdbc:mysql://ip:3306/hive", "junjun", "123456");
42         }
43         return connToMysql;
44     }
45 
46     public static void closeHive() throws SQLException {
47         if (conn != null) {
48             conn.close();
49         }
50     }
51 
52     public static void closeMysql() throws SQLException {
53         if (connToMysql != null) {
54             connToMysql.close();
55         }
56     }
57 }
复制代码
Hive工具类
复制代码
 1 package hadoop.demo.hive;
 2 
 3 import java.sql.Connection;
 4 import java.sql.ResultSet;
 5 import java.sql.SQLException;
 6 import java.sql.Statement;
 7 
 8 /**
 9  * Hive工具类
10  * 
11  * @author xiaoyun.zhao
12  * 
13  */
14 public class HiveUtil {
15     public static void createTable() {
16     }
17 
18     public static ResultSet queryHive(String hql) throws SQLException {
19         Connection conn = getConnect.GetHiveConn();
20         Statement stmt = conn.createStatement();
21         ResultSet rs = stmt.executeQuery(hql);
22         return rs;
23     }
24 }
复制代码
测试函数
复制代码
 1 package hadoop.demo.hive;
 2 
 3 import java.sql.ResultSet;
 4 
 5 public class HiveMain {
 6 
 7     public static void main(String[] args) throws Exception {
 8 
 9         ResultSet res = HiveUtil.queryHive("select * from pokes");
10 
11         while (res.next()) {
12             System.out.println("Result: key:" + res.getString(1)
13                     + "  –>  value:" + res.getString(1));
14         }
15     }
16 }
复制代码
以下是使用Java编写Flink消费Kafka写入Hive的示例代码: 1. 导入依赖 ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper; import org.apache.flink.streaming.util.serialization.SimpleStringSchema; ``` 2. 配置Kafka连接 ```java String kafkaBootstrapServers = "localhost:9092"; String kafkaTopic = "test"; Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers", kafkaBootstrapServers); kafkaProps.setProperty("group.id", "flink-group"); ``` 3. 创建 Flink 环境和 Kafka 消费者 ```java StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> kafkaStream = env.addSource(new FlinkKafkaConsumer<>(kafkaTopic, new SimpleStringSchema(), kafkaProps)); ``` 4. 对收到的消息进行处理 ```java DataStream<String> processedStream = kafkaStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); ``` 5. 将处理后的数据写入 Hive ```java String hiveTableName = "test"; String hiveMetastoreUri = "thrift://localhost:9083"; String hiveDbName = "default"; String hivePartitionColumn = "dt"; String hivePartitionValue = "20220101"; String hiveOutputPath = "/user/hive/warehouse/" + hiveDbName + ".db/" + hiveTableName + "/" + hivePartitionColumn + "=" + hivePartitionValue; DataStream<String> hiveDataStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里将数据转换为 Hive 表的格式,返回转换后的数据 return value; } }); // 将数据写入 Hive hiveDataStream.addSink(new FlinkHiveOutputFormat<>(new Path(hiveOutputPath), new org.apache.hadoop.hive.ql.io.orc.OrcSerde(), new Object[]{})); ``` 6. 将处理后的数据写回 Kafka ```java String kafkaOutputTopic = "output"; FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(kafkaBootstrapServers, kafkaOutputTopic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), kafkaProps); // 将数据写回 Kafka processedStream.addSink(kafkaProducer); ``` 完整示例代码: ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper; import org.apache.flink.streaming.util.serialization.SimpleStringSchema; import java.util.Properties; public class FlinkKafkaToHiveDemo { public static void main(String[] args) throws Exception { String kafkaBootstrapServers = "localhost:9092"; String kafkaTopic = "test"; Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers", kafkaBootstrapServers); kafkaProps.setProperty("group.id", "flink-group"); String hiveTableName = "test"; String hiveMetastoreUri = "thrift://localhost:9083"; String hiveDbName = "default"; String hivePartitionColumn = "dt"; String hivePartitionValue = "20220101"; String hiveOutputPath = "/user/hive/warehouse/" + hiveDbName + ".db/" + hiveTableName + "/" + hivePartitionColumn + "=" + hivePartitionValue; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> kafkaStream = env.addSource(new FlinkKafkaConsumer<>(kafkaTopic, new SimpleStringSchema(), kafkaProps)); DataStream<String> processedStream = kafkaStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); DataStream<String> hiveDataStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里将数据转换为 Hive 表的格式,返回转换后的数据 return value; } }); DataStream<String> kafkaOutputStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(kafkaBootstrapServers, kafkaOutputTopic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), kafkaProps); processedStream.addSink(kafkaProducer); hiveDataStream.addSink(new FlinkHiveOutputFormat<>(new Path(hiveOutputPath), new org.apache.hadoop.hive.ql.io.orc.OrcSerde(), new Object[]{})); env.execute("FlinkKafkaToHiveDemo"); } } ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值