本文测试三种方式,插入大量数据(10万以上)到mysql数据库中,有以下几种方式
- 普通方式
- foreach批量插入
- ExecutorType.BATCH插入
1 user实体类
@Setter
@Getter
public class User {
private Long id;
private String phone;
private String name;
}
2 IUserService
public interface IUserService {
void insertUsers();
void insertUser();
void batchInsert();
void insertUsersBatch();
}
3 userService类
package com.bear.dbinserttest;
import org.apache.ibatis.session.ExecutorType;
import org.apache.ibatis.session.SqlSession;
import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.List;
/**
* @description:
* @author: zhoushaoxiong
* @date: 2023/6/8 11:08
*/
@Service
public class UserService implements IUserService {
private static int count = 1000000;
@Resource
private IUserDao userDao;
@Autowired
private SqlSessionTemplate sqlSessionTemplate;
@Override
public void insertUser(){
List<User> userList = new ArrayList<>();
Long start = System.currentTimeMillis();
for (int i=0; i<count; i++){
User user = new User();
user.setName("name"+i);
user.setPhone("phonephone"+i);
userList.add(user);
userDao.insertUser(user);
}
Long usedTime = (System.currentTimeMillis()-start);
System.out.println("插入"+count+"条数据耗时::" + usedTime);
}
@Override
//foreach插入
public void insertUsers() {
List<User> userList = new ArrayList<>();
int batch = 20000;
Long start = System.currentTimeMillis();
for (int i=0; i<count; i++){
User user = new User();
user.setName("name"+i);
user.setPhone("phonephone"+i);
userList.add(user);
if(userList.size()>=batch){ //mysql的sql语句有4M的大小限制,不能全部放在一个foreach
userDao.insertUsers(userList);
userList.clear();
}
}
if(userList.size()>0){
userDao.insertUsers(userList);
}
Long usedTime = (System.currentTimeMillis()-start);
System.out.println("插入"+count+"条数据耗时::" + usedTime);
}
@Override
//ExecutorType.BATCH插入
public void insertUsersBatch(){
//手动创建userDao
SqlSession sqlSession = sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH, false);
IUserDao userDao2 = sqlSession.getMapper(IUserDao.class);
Long start = System.currentTimeMillis();
int batch = 1000;
for (int i=0; i<count; i++){
User user = new User();
user.setName("name"+i);
user.setPhone("phonephone"+i);
userDao2.insertUser(user);
if(i!=0 && i%batch==0){ //1000条提交一次
sqlSession.commit();
}
}
sqlSession.commit();
Long usedTime = (System.currentTimeMillis()-start);
System.out.println("插入"+count+"条数据耗时::" + usedTime);
}
}
4 IUserDao
package com.bear.dbinserttest;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.mybatis.spring.annotation.MapperScan;
import java.util.List;
/**
* @description:
* @author: zhoushaoxiong
* @date: 2023/6/8 11:15
*/
public interface IUserDao {
void insertUsers(List<User> userList);
void insertUser(User user);
void batchInsert(@Param("list") List<User> list);
void insertUsersBatch(List<User> users);
}
5 UserMapper
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.bear.dbinserttest.IUserDao" >
<insert id="insertUser" parameterType="com.bear.dbinserttest.User">
insert into user(name, phone) values (#{name}, #{phone})
</insert>
<-- 普通插入 --/>
<insert id="insertUsers" parameterType="com.bear.dbinserttest.User">
insert into user(name, phone) values
<foreach collection="userList" item="user" separator=",">
(#{user.name}, #{user.phone})
</foreach>
</insert>
<-- foreach插入 --/>
<insert id="batchInsert" parameterType="com.bear.dbinserttest.User">
<foreach collection="list" item="item" separator=";" >
insert into user
<trim prefix="(" suffix=")" suffixOverrides=",">
<if test="item.id != null">
id,
</if>
<if test="item.name != null">
`name`,
</if>
<if test="item.phone != null">
`phone`,
</if>
</trim>
<trim prefix="values (" suffix=")" suffixOverrides=",">
<if test="item.id != null">
#{item.id,jdbcType=INTEGER},
</if>
<if test="item.name != null">
#{item.name,jdbcType=VARCHAR},
</if>
<if test="item.phone != null">
#{item.phone,jdbcType=VARCHAR},
</if>
</trim>
</foreach>
</insert>
<-- ExecutorType.BATCH插入 --/>
<insert id="insertUsersBatch" parameterType="com.bear.dbinserttest.User">
insert into user(name, phone) values (#{name}, #{phone})
</insert>
</mapper>
6 application.yml 配置文件
url必须设置rewriteBatchedStatements=true,否则ExecutorType.BATCH插入的方式效率很低
server:
port: 8080
spring:
datasource:
username: root
password: 123456
url: jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8&useSSL=false&serverTimezone=UTC&rewriteBatchedStatements=true
mybatis:
mapper-locations: classpath:mapper/*.xml
7 DbinserttestApplicationTests 测试类
package com.bear.dbinserttest;
import org.junit.jupiter.api.Test;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@SpringBootTest
class DbinserttestApplicationTests {
@Autowired
IUserService userService;
@Test
//测试普通插入
void contextLoads() {
userService.insertUser(); //插入100000条数据耗时::74895ms
}
@Test
//测试foreach插入
void contextLoads2() {
userService.insertUsers(); //插入100000条数据耗时::1801ms
}
@Test
//测试ExecutorType.BATCH插入
void contextLoads3() {
userService.insertUsersBatch(); //插入100000条数据耗时::1143ms
}
}
8 小结
1.普通插入
默认的插入方式是遍历insert语句,单条执行,效率肯定低下,如果成堆插入,更是性能有问题。
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
INSERT INTO `table1` (`field1`, `field2`) VALUES ("data1", "data2");
可以看到每个语句的执行创建一个新的预处理语句,单条提交sql,性能低下.
2.foreach 优化插入
如果要优化插入速度时,可以将许多小型操作组合到一个大型操作中。理想情况下,这样可以在单个连接中一次性发送许多新行的数据,并将所有索引更新和一致性检查延迟到最后才进行。
INSERT INTO `table1` (`field1`, `field2`)
VALUES ("data1", "data2"),
("data1", "data2"),
("data1", "data2"),
("data1", "data2"),
("data1", "data2");
当表的列数较多(20+),以及一次性插入的行数较多(5000+)时,整个插入的耗时十分漫长,注意耗时在PreparedStatement特别长,包含了很多占位符,对于占位符和参数的映射尤其耗时
需要注意的点: Mysql 对语句的长度有限制,默认是 4M。
3.ExecutorType.BATCH插入
Mybatis内置的ExecutorType有3种,SIMPLE、REUSE、BATCH; 默认的是simple,该模式下它为每个语句的执行创建一个新的预处理语句,单条提交sql;而batch模式重复使用已经预处理的语句,并且批量执行所有更新语句,显然batch性能将更优;但batch模式也有自己的问题,比如在Insert操作时,在事务没有提交之前,是没有办法获取到自增的id,这在某型情形下是不符合业务要求的.
JDBC 在执行 SQL 语句时,会将 SQL 语句以及实参通过网络请求的方式发送到数据库,一次执行一条 SQL 语句,一方面会减小请求包的有效负载,另一个方面会增加耗费在网络通信上的时间。通过批处理的方式,我们就可以在 JDBC 客户端缓存多条 SQL 语句,然后在 flush 或缓存满的时候,将多条 SQL 语句打包发送到数据库执行,这样就可以有效地降低上述两方面的损耗,从而提高系统性能。
需要注意的点:进行jdbc批处理时需在JDBC的url中加入rewriteBatchedStatements=true
ExecutorType.BATCH原理:把SQL语句发个数据库,数据库预编译好,数据库等待需要运行的参数,接收到参数后一次运行,ExecutorType.BATCH只打印一次SQL语句,预编译一次sql,多次设置参数步骤.
经过以上三种方式分析,在插入大数据量时优先选择第三种方式,
ExecutorType.BATCH插入
参考文章:https://blog.csdn.net/qq_43486404/article/details/126637141