一、JodConverter是什么?
JodConverter,是一个Java的OpenDocument文件转换器,可以进行许多文件格式的转换。它依赖于OpenOffice.org或者LibreOffice提供的服务来进行转换,它能将Microsoft Office文档(Word,Excel,PowerPoint)转换为PDF格式。
二、使用步骤
1.依赖导入
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-core</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-local</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.jodconverter</groupId>
<artifactId>jodconverter-spring-boot-starter</artifactId>
<version>4.2.0</version>
</dependency>
<dependency>
<groupId>org.libreoffice</groupId>
<artifactId>ridl</artifactId>
<version>5.4.2</version>
</dependency>
2.下载LibreOffice
代码如下(示例):
3. 配置
jodconverter.local.enabled=true
jodconverter.local.office-home=C:\\Program Files\\LibreOffice\\
jodconverter.local.portNumbers=8100
jodconverter.local.maxTasksPerProcess=100
# 程序全局上传文件大小上限
spring.http.multipart.maxFileSize=100Mb
spring.http.multipart.maxRequestSize=102Mb
spring.http.multipart.location=${file.base.local.path}/temp
# 文件上传后返回的url地址
file.baseRemotePath=/upload
# 文件保存的本地路径
file.baseLocalPath=/data/www/upload
3. 文件上传和转换
3.1 Service
@Service("uploadService")
public class UploadServiceImpl implements UploadService {
@Value("${file.baseRemotePath}")
private String baseRemotePath;
@Value("${file.baseLocalPath}")
private String baseLocalPath;
// 控制多次请求转换同一个PDF文件
private static List<String> urlTransfering = new ArrayList<>();
@Autowired
private AttachmentPDFDao attachmentPDFDao;
@Resource
private DocumentConverter documentConverter;
// 支持转换成pdf的文件后缀名
String[] acceptExtLst = {"xls", "xlsx", "doc", "docx"};
@Override
public List<Map<String, String>> uploadFiles(String ownerToken, MultipartFile[] files) throws IOException {
if (StrUtil.isEmpty(ownerToken)) {
throw new ServiceOperationException("token不能为空");
}
if (files == null || files.length == 0) {
throw new ServiceOperationException("file不能为空");
}
List<Map<String, String>> paths = Lists.newArrayList();
for (MultipartFile multipartFile : files) {
PathResolver resolver = new PathResolver(baseLocalPath, baseRemotePath, ownerToken, multipartFile.getOriginalFilename());
// 保存原始文件到服务器
File file = new File(resolver.getFullLocalPath());
multipartFile.transferTo(file);
// 生成文件
log.info("保存原始文件到服务器成功,文件={}", resolver.getFullRemotePath());
Map<String, String> pathMap = Maps.newLinkedHashMap();
pathMap.put(ImageEnum.IMG_ORIGINAL.getName(), resolver.getFullRemotePath());
paths.add(pathMap);
}
log.info("文件服务器返回结果={}", JSON.toJSONString(paths, true));
return paths;
}
@Override
public List<Map<String, String>> filesToPdf(String ownerToken, String attachment) {
// 当前文件转换的队列
log.info("转换PDF队列,urlTransfering={}", urlTransfering);
// token (不需要token校验的可以把token移除)
if (StrUtil.isEmpty(ownerToken)) {
throw new ServiceOperationException("token不能为空");
}
if (StrUtil.isEmpty(attachment)) {
throw new ServiceOperationException("attachment不能为空");
}
if (urlTransfering.contains(attachment)) {
// 当前文件已经在转换中
return Lists.newArrayList();
}
List<Map<String, String>> paths = Lists.newArrayList();
// 获取文件类型
String suffix = StrUtil.subAfter(attachment, ".", true);
// 校验文件类型是否在指定的后缀中,存在则直接查询数据库中文档对应的PDF文档地址
if (Arrays.asList(acceptExtLst).contains(suffix)) {
List<AttachmentPDF> attachmentPDFList = attachmentPDFDao.findAll(new Specification<AttachmentPDF>() {
@Override
public Predicate toPredicate(Root<AttachmentPDF> root, CriteriaQuery<?> query, CriteriaBuilder cb) {
return cb.equal(root.<String>get("attachment"), attachment);
}
});
// 文件转换
if (CollUtil.isEmpty(attachmentPDFList)) {
// 启动线程异步转换
ThreadUtil.execAsync(() -> {
try {
// 将文件放进转换队列中
urlTransfering.add(attachment);
File fromFile = FileUtil.newFile(baseLocalPath + attachment.replace("/upload/", "/"));
PathResolver resolverPDF = new PathResolver(baseLocalPath, baseRemotePath, ownerToken, "123.pdf"); // 只取后缀,可任意定义文件名
File filePDF = FileUtil.newFile(resolverPDF.getFullLocalPath());
//核心转换方法
documentConverter.convert(fromFile).as(DefaultDocumentFormatRegistry.getFormatByExtension(suffix)).to(filePDF).as(DefaultDocumentFormatRegistry.PDF).execute();
log.info("转换PDF,文件={}", resolverPDF.getFullRemotePath());
// 往数据库插入记录
AttachmentPDF entity = new AttachmentPDF();
entity.setAttachment(attachment);
entity.setPdfPath(resolverPDF.getFullRemotePath());
attachmentPDFDao.save(entity);
log.info("往数据库插入记录成功,attachment={}", JSON.toJSONString(attachment));
} catch (OfficeException e) {
log.error("转换PDF异常", e);
} finally {
urlTransfering.remove(attachment);
}
});
} else {
Map<String, String> pathMap = Maps.newLinkedHashMap();
pathMap.put(ImageEnum.IMG_ORIGINAL.getName(), attachmentPDFList.get(0).getPdfPath());
paths.add(pathMap);
}
}
log.info("文件服务器返回结果={}", JSON.toJSONString(paths, true));
return paths;
}
}
}
3.2 PDF实体
/**
* 附件转PDF表
*/
@Entity
@Data
@Table(name = "attachment_pdf")
public class AttachmentPDF implements Serializable {
private static final long serialVersionUID = 10003L;
@Id
@Column(name = "id")
@GeneratedValue(strategy = GenerationType.IDENTITY) // 自增长
private Integer id;
/**
* 附件路径
*/
@Column(name = "attachment")
private String attachment;
/**
* pdf路径
*/
@Column(name = "pdf_path")
private String pdfPath;
}
3.3 文件路径分解类封装
import java.io.File;
import java.nio.file.Paths;
import java.util.Date;
import java.util.UUID;
import com.google.common.io.Files;
import org.apache.http.client.utils.DateUtils;
public class PathResolver {
/**
* 远程基础路径
*/
final String baseRemotePath;
/**
* 本地基础路径
*/
final String baseLocalPath;
/**
* 远程完整路径
*/
final String fullRemotePath;
/**
* 本地完整路径
*/
final String fullLocalPath;
/**
* 文件所有者
*/
final String ownerToken;
/**
* 文件名
*/
final String fileName = UUID.randomUUID().toString().replace("-", "");
/**
* 文件扩展名
*/
final String extName;
/**
* 时间文件夹
*/
final String dateDir;
public PathResolver(String baseLocalPath, String baseRemotePath, String ownerToken, String originalFilename) {
this.baseLocalPath = baseLocalPath;
this.baseRemotePath = baseRemotePath;
this.ownerToken = ownerToken;
this.extName = Files.getFileExtension(originalFilename);
this.dateDir = DateUtils.formatDate(new Date(), "yyyyMM");
String directoryPath = Paths.get(baseLocalPath, ownerToken, dateDir).toString();
ensureDirExists(directoryPath);
// 本地完整路径
this.fullLocalPath = Paths.get(directoryPath, getFileName("")).toString();
// 远程完整路径
this.fullRemotePath = getRemotePath(baseRemotePath, ownerToken, dateDir, getFileName(""));
}
private String getFileName(String prefix) {
return String.format("%s%s.%s", this.fileName, prefix, this.extName);
}
private String getRemotePath(String var1, String... var2) {
if (var2.length == 0) {
return var1;
} else {
StringBuilder var4 = new StringBuilder();
for (String var3 : var2) {
var4.append("/");
var4.append(var3);
}
return var1 + var4.toString().replace("//", "/");
}
}
private void ensureDirExists(String dirPath) {
File dirFile = new File(dirPath);
if (!dirFile.exists()) {
dirFile.mkdirs();
}
}
public boolean isImage() {
if ("jpg".equalsIgnoreCase(extName))
return true;
if ("gif".equalsIgnoreCase(extName))
return true;
if ("bmp".equalsIgnoreCase(extName))
return true;
if ("jpeg".equalsIgnoreCase(extName))
return true;
if ("png".equalsIgnoreCase(extName))
return true;
return false;
}
public String getFullLocalPath() {
return this.fullLocalPath;
}
public String getFullRemotePath() {
return this.fullRemotePath;
}
public String getFileName() {
return fileName;
}
}
注意
如果需要把文件上传和转PDF的服务打包上服务器时,在使用Docker做容器挂载的时候,有可能会找不到LibreOffice,最好是直接启动jar包
服务在docker做容器挂载:
宿主机的/data/logs目录和容器中的 /usr/local/logs 目录相关联,这两个目录中的文件会互相同步
docker run -d --name project_web_dev -p 8080:8080 -v /data/logs:/usr/local/logs
直接启动jar包
java -jar project_web_dev.jar is