EasyExcel实现百万数据批量导出

当数据量比较大时,例如数据量达到百万级,传统的一次读取到内存中在写入excel文件的方法便不再适用了,可能会导致内存溢出;而且一次性将数据写入一张sheet工作表也不太好。
但我们可以选择数据分片的方式批量写入多个工作表。
测试数据100w条,写入到Excel表中,分成5个sheet表,每个sheet表20w条。

1.引入依赖

pom.xml

    <dependencies>
        <!-- web -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        
        <!-- mybatis -->
        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>2.2.2</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

        <!-- alibaba easyexcel -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>3.0.5</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-ooxml</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-ooxml-schemas</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- lombok -->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.24</version>
        </dependency>
        <!-- mybatis-plus -->
        <dependency>
            <groupId>com.baomidou</groupId>
            <artifactId>mybatis-plus-boot-starter</artifactId>
            <version>3.5.3</version>
        </dependency>
        <!-- jdbc -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-jdbc</artifactId>
        </dependency>

        <!--Mysql依赖包-->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
        </dependency>
        <!-- hutool -->
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.7.16</version>
        </dependency>
    </dependencies>

2. Controller/Service/Mapper

实体类User

import com.alibaba.excel.annotation.ExcelProperty;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.io.Serializable;
import java.time.LocalDateTime;


@NoArgsConstructor
@AllArgsConstructor
@Data
@TableName("user")
public class User implements Serializable {

    @TableId("id")
    @ExcelProperty("id")
    private Long id;

    @ExcelProperty("username")
    private String username;

    @ExcelProperty("password")
    private String password;

    @ExcelProperty("email")
    private String email;

    @ExcelProperty("phoneNumber")
    private String phoneNumber;

    @ExcelProperty("createdAt")
    private LocalDateTime createdAt;

    @ExcelProperty(value = "updatedAt")
    private LocalDateTime updatedAt;

}

UserCtrl


import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.UUID;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.alibaba.excel.write.style.column.LongestMatchColumnWidthStyleStrategy;
import com.example.excel.config.LocalDateTimeConverter;
import com.example.excel.domain.User;
import com.example.excel.mapper.UserMapper;
import com.example.excel.util.ExcelConstants;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executor;

/**
 * @author: hong.jian
 * 09-28 11:37
 */
@RestController
@RequiredArgsConstructor
@Slf4j
@RequestMapping("/export")
public class UserCtrl {


    private final UserMapper userMapper;
    private final Executor exportThreadPoolExecutor;
}

UserService 和 UserServiceImpl

public interface UserService extends IService<User> {



}


@Service
@RequiredArgsConstructor
public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements UserService {
    private final UserMapper userMapper;



}

UserMapper


@Mapper
public interface UserMapper extends BaseMapper<User> {

    List<User> selfSelectList(@Param("currentPage") int currentPage, @Param("pageSize") int pageSize);

}

UserMapper.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.excel.mapper.UserMapper">
   <!-- 使用覆盖索引处理深度分页 -->
    <select id="seleSelectList"  resultType="com.example.excel.domain.User">
        SELECT
            u.id,
            u.username,
            u.PASSWORD,
            u.email,
            u.phone_number,
            u.created_at,
            u.updated_at
        FROM
            USER u,
            ( SELECT t.id FROM USER t ORDER BY t.id LIMIT #{currentPage}, #{pageSize} ) AS d
        WHERE
            u.id = d.id;
    </select>


</mapper>

LocalDateTime时间字段转换器

import com.alibaba.excel.converters.Converter;
import com.alibaba.excel.metadata.GlobalConfiguration;
import com.alibaba.excel.metadata.data.WriteCellData;
import com.alibaba.excel.metadata.data.ReadCellData;
import com.alibaba.excel.metadata.property.ExcelContentProperty;

import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;

/**
 * 自定义LocalDateTime时间字段类型转换器
 */
public class LocalDateTimeConverter implements Converter<LocalDateTime> {
	private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");

	@Override
	public Class<LocalDateTime> supportJavaTypeKey() {
		return LocalDateTime.class;
	}

	@Override
	public WriteCellData<?> convertToExcelData(LocalDateTime value, ExcelContentProperty contentProperty, GlobalConfiguration globalConfiguration) {
		if (value != null) {
			return new WriteCellData<>(value.format(FORMATTER));
		}
		return new WriteCellData<>("");
	}

	@Override
	public LocalDateTime convertToJavaData(ReadCellData<?> cellData, ExcelContentProperty contentProperty, GlobalConfiguration globalConfiguration) {
		if (cellData.getStringValue() != null) {
			return LocalDateTime.parse(cellData.getStringValue(), FORMATTER);
		}
		return null;
	}
}

3. 编写Controller方法

    @RequestMapping("/test")
    public void export(HttpServletResponse response) throws IOException {
        log.info("*********统计查询列表导出开始!**************");
        long start = System.currentTimeMillis();
        // 文件名
        String fileName = String.valueOf(UUID.randomUUID());
        OutputStream outputStream = null;
        try {
            // 总记录数:实际中需要根据查询条件进行统计即可:一共多少条
            long totalCount = userMapper.selectCount(null);
            // 每一个Sheet存放20w条数据
            Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;
            // 每次写入的数据量10w,每页查询10W
            Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;
            // 计算需要的Sheet数量
            long sheetNum = totalCount % sheetDataRows == 0 ? (totalCount / sheetDataRows) : (totalCount / sheetDataRows + 1);
            // 计算一般情况下每一个Sheet需要写入的次数(一般情况不包含最后一个sheet,因为最后一个sheet不确定会写入多少条数据)
            Integer oneSheetWriteCount = sheetDataRows / writeDataRows;
            // 计算最后一个sheet需要写入的次数
            long lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount : (totalCount % sheetDataRows % writeDataRows == 0 ?
                    ((totalCount % sheetDataRows) / writeDataRows) : ((totalCount % sheetDataRows) / writeDataRows + 1));
            outputStream = response.getOutputStream();
            // 必须放到循环外,否则会刷新流
            ExcelWriter excelWriter = EasyExcel.write(outputStream).build();
            // 开始分批查询分次写入
            for (int i = 0; i < sheetNum; i++) {
                log.info("*********统计查询列表第" + (i + 1) + "页导出开始!**************");
                // 循环写入次数: j的自增条件是当不是最后一个Sheet的时候写入次数为正常的每个Sheet写入的次数,如果是最后一个就需要使用计算的次数lastSheetWriteCount
                for (int j = 0; j < (i != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {
                    // 分页查询一次10w
                    // 设置偏移量
                    int currentPage = (j + oneSheetWriteCount * i) * writeDataRows;
                    // 设置每页大小
                    int pageSize = writeDataRows;
                    // 使用覆盖索引避免深度分页
                    List<User> userList = userMapper.selfSelectList(currentPage, pageSize);
                    if (CollUtil.isEmpty(userList)) {  // 记录集合判空
                        continue;
                    }
                    // 写入到excel:
                    // 这里可以通过设置includeColumnFiledNames、excludeColumnFiledNames导出什么字段,可以动态配置,前端传过来那些列,就导出那些列
                    WriteSheet writeSheet = EasyExcel.writerSheet(i, "Sheet" + (i + 1))
                            .head(User.class)
                            .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())   // 自动调整列宽
                            .registerConverter(new LocalDateTimeConverter())    // 时间字段转换器
                            // .includeColumnFiledNames()  // 只导出指定字段集合
                            // .excludeColumnFiledNames()  // 排除指定字段集合
                            .build();
                    excelWriter.write(userList, writeSheet);
                }
                log.info("*********统计查询列表第" + (i + 1) + "页导出结束!**************");
            }
            // 下载EXCEL,返回给前端stream流
            response.setContentType("application/octet-stream");
            response.setCharacterEncoding("utf-8");
            response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
            excelWriter.finish();
            outputStream.flush();
            log.info("*********统计查询列表导出结束!**************");
            log.info("耗时:" + (System.currentTimeMillis() - start));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
        }

    }

4. 测试

100w条数据导出约27s,还是有点慢。

9-28 17:49:27.847  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表导出开始!**************
2024-09-28 17:49:28.744  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第1页导出开始!**************
2024-09-28 17:49:32.857  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第1页导出结束!**************
2024-09-28 17:49:32.857  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第2页导出开始!**************
2024-09-28 17:49:36.955  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第2页导出结束!**************
2024-09-28 17:49:36.955  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第3页导出开始!**************
2024-09-28 17:49:41.007  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第3页导出结束!**************
2024-09-28 17:49:41.007  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第4页导出开始!**************
2024-09-28 17:49:45.109  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第4页导出结束!**************
2024-09-28 17:49:45.109  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第5页导出开始!**************
2024-09-28 17:49:49.272  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第5页导出结束!**************
2024-09-28 17:49:55.630  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表导出结束!**************
2024-09-28 17:49:55.631  INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : 耗时:27784

5. 思考

引入线程池

    @Bean
    public Executor exportThreadPoolExecutor(){
        return  new ThreadPoolExecutor(
                5,
                10,
                10,
                TimeUnit.SECONDS,
                new ArrayBlockingQueue<>(100),
                new ThreadPoolExecutor.CallerRunsPolicy()
        );
    }

并在UserController中注入

使用线程池优化代码

    @RequestMapping("/test2")
    public void export2(HttpServletResponse response) throws IOException {
        log.info("*********统计查询列表导出开始!**************");
        long start = System.currentTimeMillis();
        String fileName = String.valueOf(UUID.randomUUID());
        OutputStream outputStream = null;
        try {
            long totalCount = userMapper.selectCount(null);
            Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;
            Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;
            long sheetNum = (totalCount + sheetDataRows - 1) / sheetDataRows;
            Integer oneSheetWriteCount = sheetDataRows / writeDataRows;
            long lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount :
                    (totalCount % sheetDataRows + writeDataRows - 1) / writeDataRows;
            outputStream = response.getOutputStream();
            ExcelWriter excelWriter = EasyExcel.write(outputStream).build();
            // 信号枪,配合多线程实现并发编程
            CountDownLatch latch = new CountDownLatch((int) sheetNum);
            for (int i = 0; i < sheetNum; i++) {
                int finalI = i;
                CompletableFuture.runAsync(() -> {
                    log.info("*********统计查询列表第" + (finalI + 1) + "页导出开始!**************");
                    WriteSheet writeSheet = EasyExcel.writerSheet(finalI, "Sheet" + (finalI + 1))
                            .head(User.class)
                            .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                            .registerConverter(new LocalDateTimeConverter())
                            .build();
                    try {
                        for (int j = 0; j < (finalI != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {
                            int currentPage = (j + oneSheetWriteCount * finalI) * writeDataRows;
                            List<User> userList = userMapper.selfSelectList(currentPage, writeDataRows);

                            if (CollUtil.isEmpty(userList)) {
                                continue;
                            }
                            // 加锁实现互斥写入,确保线程安全
                            synchronized (excelWriter) {
                                excelWriter.write(userList, writeSheet);
                            }
                        }
                    } finally {
                        latch.countDown();
                        log.info("*********统计查询列表第" + (finalI + 1) + "页导出结束!**************");
                    }
                }, exportThreadPoolExecutor);   // 尝试引入多线程
            }
            latch.await();
            response.setContentType("application/octet-stream");
            response.setCharacterEncoding("utf-8");
            response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
            excelWriter.finish();
            outputStream.flush();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
        }
        log.info("*********统计查询列表导出结束!**************");
        log.info("耗时:" + (System.currentTimeMillis() - start));
    }

再次测试,耗时约20s

2024-09-28 18:05:54.239  INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表导出开始!**************
2024-09-28 18:05:55.132  INFO 53952 --- [pool-1-thread-2] com.example.excel.controller.UserCtrl    : *********统计查询列表第2页导出开始!**************
2024-09-28 18:05:55.132  INFO 53952 --- [pool-1-thread-3] com.example.excel.controller.UserCtrl    : *********统计查询列表第3页导出开始!**************
2024-09-28 18:05:55.132  INFO 53952 --- [pool-1-thread-4] com.example.excel.controller.UserCtrl    : *********统计查询列表第4页导出开始!**************
2024-09-28 18:05:55.132  INFO 53952 --- [pool-1-thread-5] com.example.excel.controller.UserCtrl    : *********统计查询列表第5页导出开始!**************
2024-09-28 18:05:55.132  INFO 53952 --- [pool-1-thread-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第1页导出开始!**************
2024-09-28 18:06:04.341  INFO 53952 --- [pool-1-thread-3] com.example.excel.controller.UserCtrl    : *********统计查询列表第3页导出结束!**************
2024-09-28 18:06:05.483  INFO 53952 --- [pool-1-thread-4] com.example.excel.controller.UserCtrl    : *********统计查询列表第4页导出结束!**************
2024-09-28 18:06:06.591  INFO 53952 --- [pool-1-thread-5] com.example.excel.controller.UserCtrl    : *********统计查询列表第5页导出结束!**************
2024-09-28 18:06:07.795  INFO 53952 --- [pool-1-thread-1] com.example.excel.controller.UserCtrl    : *********统计查询列表第1页导出结束!**************
2024-09-28 18:06:08.871  INFO 53952 --- [pool-1-thread-2] com.example.excel.controller.UserCtrl    : *********统计查询列表第2页导出结束!**************
2024-09-28 18:06:14.907  INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : *********统计查询列表导出结束!**************
2024-09-28 18:06:14.907  INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl    : 耗时:20667

可以发现,性能有提升,但提升不是很明显。我个人理解是为了避免多个线程同时写入excel产生的线程安全问题,采用了加锁的方式,确保线程间使用excelwriter写入excel文件的操作是互斥的,这导致性能收到了影响。

参考链接:使用EasyExcel实现excel导出,支持百万大数据量导出

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值