背景
现在java jdbc大多数批量插入都是使用addBatch()来实现的。
hive表不支持addBatch(),需要使用单条sql实现批量插入。
步骤
1. pom.xml
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.3.7</version>
</dependency>
2. HiveJdbcHelper类
package com.study.util;
import com.study.entity.Student;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class HiveJdbcHelper {
// 设置单例
private HiveJdbcHelper() {
}
private static class JDBCHelperInstance {
private static final HiveJdbcHelper INSTANCE = new HiveJdbcHelper();
}
public static HiveJdbcHelper getInstance() {
return JDBCHelperInstance.INSTANCE;
}
// 建立数据库连接
static {
try {
String driver = "org.apache.hive.jdbc.HiveDriver";
Class.forName(driver);
} catch (Exception e) {
e.printStackTrace();
}
}
private static Connection getConnect() {
Connection conn = null;
try {
conn = DriverManager.getConnection("jdbc:hive2://10.85.24.51:10000/test_databases", "data_check", "data123456");
} catch (SQLException throwables) {
System.out.println("数据库连接失败!");
throwables.printStackTrace();
}
return conn;
}
/**
* 测试主类入口
*
* @param args args
*/
public static void main(String[] args) {
List<Student> students = new ArrayList<Student>();
for (int i = 1; i <= 10000; i++) {
Student student = new Student();
student.setId(i + "");
student.setName("wangwu" + i);
student.setAge(23 + i + "");
students.add(student);
}
HiveJdbcHelper.getInstance().executeBatch("student", students);
}
/**
* 单条insert 插入多条数据
*
* @param tableName 表名
* @param students 实体类列表
*/
public void executeBatch(String tableName, List<Student> students) {
PreparedStatement pstmt = null;
String insertSqlTemplate = "insert into %s values";
String insertSql = String.format(insertSqlTemplate, tableName);
try {
List<String> rowList = new ArrayList<>(students.size());
Field[] fields = Student.class.getDeclaredFields();
int colNumber = fields.length;
for (int row = 0; row < students.size(); row++) {
List<String> colList = new ArrayList<>();
for (int placeHolder = 0; placeHolder < colNumber; placeHolder++) {
colList.add("?");
}
rowList.add("(" + String.join(",", colList) + ")");
}
insertSql = insertSql + String.join(",", rowList);
System.out.println("insertSql: " + insertSql);
pstmt = getConnect().prepareStatement(insertSql);
// pstmt.setObject
for (int row = 0; row < students.size(); row++) {
Student student = students.get(row);
//遍历实体类属性值
for (int col = 0; col < fields.length; col++) {
Field f = fields[col];
f.setAccessible(true);
try {
System.out.println("属性名:" + f.getName() + " 属性值:" + f.get(student));
System.out.println("index: " + ((row) * fields.length + (col + 1)) + " 属性值:" + f.get(student));
pstmt.setObject((row) * fields.length + (col + 1), f.get(student));
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
}
// 执行写入
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println("beginTime:" + df.format(new Date()));
long startTime = System.currentTimeMillis();
pstmt.execute();
long endTime = System.currentTimeMillis();
System.out.println("endTime:" + df.format(new Date()));
System.out.println("execute insert time: " + (endTime - startTime) / (1000) + "s");
} catch (SQLException throwables) {
throwables.printStackTrace();
}
}
}