多线程去处理pdf转jpg文件,生产环境遇到的内存(Caused by: java.lang.OutOfMemoryError: Java heap space)问题

需求:文件上传到阿里云OSS,然后返回URl,当遇到文件是pdf格式的文件先转成JPG格式再上传到OSS返回文件地址数组。Java代码实现过程如下:
控制器

package com.myapestech.project.common;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.myapestech.project.common.result.Result;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import javax.annotation.Resource;
import java.io.IOException;

/**
 * @author Kra丶*
 * @apiNote 文件相关的接口控制器
 * @date 2023/5/3 21:37
 */
@RestController
@RequestMapping("/api/oss/file")
public class FileController {

    @Resource
    private FileService fileService;

    @PostMapping("/upload")
    public Result upload(@RequestParam("file") MultipartFile file) throws IOException {
       return Result.ok(file.getOriginalFilename(),this.fileService.uploadFile(file));
    }

    @PostMapping("/uploadFiles")
    public Result uploadFiles(@RequestParam("files") MultipartFile[] files) throws JsonProcessingException {
        return Result.ok("filesAddress",this.fileService.uploadFiles(files));
    }
}

业务实现:

package com.myapestech.project.common;

import com.aliyun.oss.OSSClient;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import static io.lettuce.core.internal.Futures.allOf;

@Slf4j
@Service
public class FileService {
    @Value("${oss.endpoint}")
    private String endpoint;
    @Value("${oss.bucketName}")
    private String bucketName;
    @Value("${oss.accessKeyId}")
    private String accessKeyId;
    @Value("${oss.accessKeySecret}")
    private String accessKeySecret;

    public Map<Object, Object> uploadFile(MultipartFile file) throws IOException {
        // 开始计时
        long startTime = System.nanoTime();
        Map<Object, Object> map = new HashMap<>();
        if (!"application/pdf".equalsIgnoreCase(file.getContentType())) {
            return uploadImage(file, map);
        }
        PDDocument pdf = null;
        try {
            pdf = PDDocument.load(file.getInputStream());
            PDFRenderer pdfRenderer = new PDFRenderer(pdf);
            ExecutorService executor =
                    Executors.newFixedThreadPool(10);
            List<CompletableFuture<String>> futures = new ArrayList<>(pdf.getNumberOfPages());
            for (int i = 0; i < pdf.getNumberOfPages(); ++i) {
                int finalI = i;
                CompletableFuture<String> future =
                        CompletableFuture.supplyAsync(
                                () -> {
                                    try {
                                        BufferedImage bim =
                                                pdfRenderer.renderImageWithDPI(finalI, 600, ImageType.RGB);
                                        if (bim != null) {
                                            String fileName =
                                                    UUID.randomUUID().toString().replace("-", "") + ".jpg";
                                            OSSClient ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecret);
                                            ByteArrayOutputStream baos = new ByteArrayOutputStream();
                                            ImageIO.write(bim, "jpg", baos);
                                            InputStream inputStream = new ByteArrayInputStream(baos.toByteArray());
                                            ossClient.putObject(bucketName, fileName, inputStream);
                                            return "https://" + bucketName + "." + endpoint + "/" + fileName;
                                        }
                                    } catch (IOException e) {
                                        log.error("Error rendering page {} of PDF file", finalI, e);
                                    }
                                    return null;
                                },
                                executor);
                futures.add(future);
            }
            CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
            int i = 1;
            for (CompletableFuture<String> future : futures) {
                try {
                    String url = future.get();
                    if (url != null) {
                        map.put(i++, url);
                    }
                } catch (InterruptedException | ExecutionException e) {
                    log.error("Error getting future result", e);
                }
            }
        } finally {
            if (pdf != null) {
                pdf.close();
            }
        }
        // 停止计时
        long endTime = System.nanoTime();
        // 计算执行时间
        long duration = (long) ((endTime - startTime) / 1000000.0);
        map.put("Execution time(ms):",duration);
        return map;
    }

    private Map<Object, Object> uploadImage(MultipartFile file, Map<Object, Object> map)
            throws IOException {
        OSSClient ossClient = null;
        try {
            ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecret);
            String fileName =
                    UUID.randomUUID().toString().replace("-", "") + "." + FilenameUtils.getExtension(file.getOriginalFilename());
            ossClient.putObject(bucketName, fileName, file.getInputStream());
            map.put("url", "https://" + bucketName + "." + endpoint + "/" + fileName);
        } catch (IOException e) {
            log.error("Error uploading image", e);
            map.put("error", "上传失败");
        } finally {
            if (ossClient != null) {
                ossClient.shutdown();
            }
        }
        return map;
    }
}

由于本地环境是Windows 16G内存,所以测试时候是没问题的如下图:
在这里插入图片描述
生产环境部署的时候脚本如下:

nohup java -Xms256m -Xmx256m -jar /X/X/X/xxx-2.6.6.jar  --spring.profiles.active=prod> xxx.out 2>&1 &

图片上传没有问题,遇到pdf文件上传就报错。查看生产环境日志发现异常

Caused by: java.lang.OutOfMemoryError: Java heap space

当时想到的解决思路是
1、可以尝试对上传的 PDF 文件进行分片,将每个分片处理成独立的图片文件后再上传到阿里云 OSS,以避免一次性加载全部 PDF 文件导致内存溢出。
2、放大生产环境的运行内存到768m,直接尝试问题解决!。

若过路大神有别的更好的办法请不吝赐教…

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值