最近面试被问到如何快速将百万级数据保存到数据库。当时回答不理想,事后总结如下。
主要思路:
1.首先将数据一次读入到内存(如果文件超过系统内存,会有问题。具体解决方案我没尝试);
2.开辟多个线程,每个线程新建一个数据库连接;
3.每个线程批量执行插入操作。必须要设置conn.setAutoCommit(false)。这个很重。
具体实现步骤如下:
1. 获取jdbc连接类
package io;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
/**
* @ClassName: JDBCConn
* @Author: wanggj
* @Date: 2020/5/15 9:15
**/
public class JDBCConn {
private static String DRIVERCLASS = "com.mysql.cj.jdbc.Driver";
private static String URL = "jdbc:mysql://localhost:3306/stu?characterEncoding=utf8&serverTimezone=UTC";
private static String USERNAME = "root"; //数据库用户名
private static String PASSWORD = "root"; //数据库密码
public static Connection getConnection() throws ClassNotFoundException, SQLException {
Class.forName(DRIVERCLASS);
Connection conn = DriverManager.getConnection(URL, USERNAME, PASSWORD);
return conn;
}
}
2.文本数据封装为对象
package io;
import java.io.Serializable;
/**
* @ClassName: UserInfo
* @Author: wanggj
* @Date: 2020/5/15 9:26
**/
public class UserInfo implements Serializable {
private String name; //名字
private int gender; //性别
private String address; //大区
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getGender() {
return gender;
}
public void setGender(int gender) {
this.gender = gender;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
}
3.读取文件到数据库
package io;
/**
* @ClassName: Tinput
* @Author: wanggj
* @Date: 2020/5/15 9:28
**/
import java.io.*;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.IntStream;
public class MultiThreadIO {
public void importData(String filePath) {
long start = System.currentTimeMillis();
int i = 0;
List<UserInfo> LS = new ArrayList<>();
try (// 当逐行读写大于2G的文本文件时推荐使用以下代码
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(filePath)));
BufferedReader in = new BufferedReader(new InputStreamReader(bis, "UTF-8"), 10 * 1024 * 1024)) {
while (in.ready()) {
String line = in.readLine();
String[] arr = line.split(","); //将读取的每一行以 , 号分割成数组
if (arr.length < 3) continue; //arr数组长度大于3才是一条完整的数据
UserInfo userInfo = new UserInfo();
userInfo.setName(arr[0]); //名称
userInfo.setAddress(arr[1]); //地址
userInfo.setGender(Integer.parseInt(arr[2])); //性别
LS.add(userInfo); //把从文件中读取的数据存到内存里
i++; //记录读取的条数
}
System.out.println("\n总共读取Txt文件中" + i + "条数据");
try {
List<Thread> threads = new ArrayList<>();
IntStream.rangeClosed(0, 28).forEach(t -> {
Thread thread = new Thread() {
@Override
public void run() {
try {
final Connection con = JDBCConn.getConnection();
System.out.println("sql连接成功");
String sql = "INSERT INTO user_info(`name` , `address`, `gender`) VALUES (?, ?, ?)";
con.setAutoCommit(false); //(重要)必须设置为批量提交,为true的话,插入一条commit1次,非常耗时。
PreparedStatement ptatm = con.prepareStatement(sql);
LS.stream().skip(t * 100000).limit(100000).forEach(userInfo -> {
try {
ptatm.setString(1, userInfo.getName()); //名称
ptatm.setString(2, userInfo.getAddress()); //大区
ptatm.setInt(3, userInfo.getGender()); //登录时间
ptatm.addBatch(); //批量记录到容器里
} catch (SQLException e) {
e.printStackTrace();
}
});
System.out.println(Thread.currentThread().getName() + "执行批量插入数据 开始.");
ptatm.executeBatch();
con.commit();
System.out.println(Thread.currentThread().getName() + "执行批量插入数据 结束.");
con.close();
ptatm.close();
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
};
thread.start();
threads.add(thread);
});
//所有子线程执行完毕执行主线程
threads.forEach(thread -> {
try {
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
});
System.out.println("任务执行完成。执行用时:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
测试客户端
package io;
/**
* @ClassName: IOMainTest
* @Author: wanggj
* @Date: 2020/5/15 9:07
**/
public class IOMainTest {
public static void main(String[] args) {
MultiThreadIO threadIO = new MultiThreadIO();
String filePath = "C:\\Users\\wanggj\\user.txt";
threadIO.importData(filePath);
}
}
注意:线上环境时,需要使用线程池替换自己手动创建线程。另外本人能力有限,如果有误,还请指正。