背景
最近工作中遇到了解析excel,然后批量插入,发现这个插入时间比较长,所以想要进行一些优化,大家可以跳过过程直接看结论
背景
作)准备工作,创建一张测试表
CREATE TABLE `user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`user_age` int(11) DEFAULT NULL COMMENT '年龄',
`master` int(11) DEFAULT NULL COMMENT '班主任',
`is_delete` int(2) DEFAULT NULL,
`is_deletet` int(2) DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=614674 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
第一种foreach批量插入
<insert id="batchInsert">
insert into user(name, user_age, master, is_delete)
values
<foreach collection="entities" item="entity" separator=",">
(#{entity.name}, #{entity.userAge}, #{entity.master}, #{entity.isDelete})
</foreach>
</insert>
代码执行
public void testBatchInsert() throws InterruptedException {
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
userMapper.batchInsert(list);
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
第二种开启事务,分批批量插入
@Transactional
public void testBatchInsert() throws InterruptedException {
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
List<User> temp = new ArrayList<>();
for (User user : list) {
temp.add(user);
if (temp.size() == 100) {
userMapper.batchInsert(temp);
temp.clear();
}
}
if (temp.size() > 0) {
userMapper.batchInsert(temp);
temp.clear();
}
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
第三种ExecutorType.BATCH,单条插入
public void testBatchInsert() throws InterruptedException {
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
List<User> temp = new ArrayList<>();
SqlSession sqlSession = sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH, false);
UserMapper mapper = sqlSession.getMapper(UserMapper.class);
list.forEach(e -> mapper.insert(e));
sqlSession.commit();
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
第四种,ExecutorType.BATCH,批量插入
public void testBatchInsert() throws InterruptedException {
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
List<User> temp = new ArrayList<>();
SqlSession sqlSession = sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH, false);
UserMapper mapper = sqlSession.getMapper(UserMapper.class);
mapper.batchInsert(list);
sqlSession.commit();
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
第五种,原始jdbc,批量插入
@Test
public void testBatchInsertJDBC() throws InterruptedException, SQLException {
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
Connection connection = dataSource.getConnection();
connection.setAutoCommit(false);
Statement statement = connection.createStatement();
PreparedStatement ins = connection.prepareStatement("insert into user(name, user_age, master, is_delete) values (?, ?, ?, ?)");
for (int i = 1; i <= list.size(); i++) {
User user = list.get(i - 1);
ins.setString(1, user.getName());
ins.setInt(2, user.getUserAge());
ins.setInt(3, user.getMaster());
ins.setInt(4, user.getIsDelete());
ins.addBatch();
}
ins.executeBatch();
connection.commit();
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
第六种,原始jdbc,批量分批插入
List<User> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
User user = new User();
user.setName("zhangsan"+i);
user.setUserAge(i);
user.setMaster(0);
user.setIsDelete(0);
list.add(user);
}
long start = System.currentTimeMillis();
Connection connection = dataSource.getConnection();
connection.setAutoCommit(false);
Statement statement = connection.createStatement();
PreparedStatement ins = connection.prepareStatement("insert into user(name, user_age, master, is_delete) values (?, ?, ?, ?)");
for (int i = 1; i <= list.size(); i++) {
User user = list.get(i - 1);
ins.setString(1, user.getName());
ins.setInt(2, user.getUserAge());
ins.setInt(3, user.getMaster());
ins.setInt(4, user.getIsDelete());
ins.addBatch();
if (i % 50 == 0) {
int[] ints = ins.executeBatch();
}
}
connection.commit();
System.out.println("花费时间:" + (System.currentTimeMillis() - start));
}
数据
测试数据仅供参考,计算平均时忽略掉了一些异常数据,这里原始jdbc插入时间反而更久,代码可能有问题,也可能数据里比较小
结论
**推荐使用 第四种,ExecutorType.BATCH,批量插入 方式,这种方式看到网上说使用单条插入的帖子很多,经过测试使用单条插入效率极低,应该使用batchInsert 插入
也可以使用开启事务批量分批插入,不过需要寻找一个合适的批量值,和插入字段多少大小可能有关系。**