声明:我的工程是父子工程聚合
- 父工程为server,子工程为browser,auth,mbg,common
- browser继承了common,mbg,auth
- auth继承了mbg,common
1.需要依赖
<!--父工程中-->
<!--版本管理-->
<cpdetector.version>1.0.7</cpdetector.version>
<javacsv.version>2.0</javacsv.version>
<streamer.version>1.2.0</streamer.version>
<!--内容管理-->
<!--第三方cpdetector获取文件编码格式-->
<dependency>
<groupId>cpdetector</groupId>
<artifactId>cpdetector</artifactId>
<version>${cpdetector.version}</version>
</dependency>
<!--csv-->
<dependency>
<groupId>net.sourceforge.javacsv</groupId>
<artifactId>javacsv</artifactId>
<version>${javacsv.version}</version>
</dependency>
<!--读取大型xlsx文件工具类-->
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
<version>${streamer.version}</version>
</dependency>
<!--子工程中-->
<!--第三方cpdetector获取文件编码格式,common需要-->
<dependency>
<groupId>cpdetector</groupId>
<artifactId>cpdetector</artifactId>
</dependency>
<!--读取大型xlsx文件工具类,common和browser都需要-->
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
</dependency>
<!--csv browser需要-->
<dependency>
<groupId>net.sourceforge.javacsv</groupId>
<artifactId>javacsv</artifactId>
</dependency>
<!--需要仓库,丢在依赖下方,和依赖组平级(我丢在了common工程里)-->
<repositories>
<repository>
<id>ebi</id>
<name>www.ebi.ac.uk</name>
<url>http://www.ebi.ac.uk/intact/maven/nexus/content/groups/public/</url>
</repository>
</repositories>
2.controller和serviceImpl
package com.browser.controller;
import com.browser.service.IMarksetFileService;
import com.browser.vo.AuditFileParam;
import com.common.result.Result;
import com.common.result.ResultCode;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.core.context.SecurityContextHolder;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
@RestController
@RequestMapping("/markset")
@Api(tags = "MarksetController", description = "标注集管理")
public class MarksetController {
@Autowired
private IMarksetFileService marksetFileService;
@PostMapping("/file/upload")
@ApiOperation("对数据集上传文件")
public Result uploadFile(MultipartFile file, @RequestParam Integer marksetId) {
Result result = marksetFileService.upload(file, marksetId);
return result;
}
@ApiOperation("根据id预览文件")
@GetMapping("/file/preview")
public Result previewFile(@RequestParam Integer fileId, @RequestParam(defaultValue = "10") Integer pageSize) {
if (fileId == null) {
return new Result(ResultCode.PARAM_LESS);
}
List data = marksetFileService.previewFile(fileId, pageSize);
if (data == null) {
return new Result(ResultCode.FAILED);
}
return new Result(data);
}
@ApiOperation("导入")
@PutMapping("/auditFile")
public Result auditFile(@RequestBody AuditFileParam param) {
Integer count = marksetFileService.auditFile(param);
if (count == -3) {
return new Result(ResultCode.FILE_PATH_NOTFOUND);
} else if (count == -4) {
return new Result(ResultCode.FILE_IS_NULL);
} else if (count >= 0 && param.getStatus().equals("2")) {//status:未通过,操作成功
return new Result();
} else if (count >= 0 && param.getStatus().equals("1")) {//status:通过,操作成功
return new Result(1, "当前数据中共" + count + "条数据!");
}
return new Result(ResultCode.FAILED);
}
}
//实现类
package com.browser.service.impl;
import com.csvreader.CsvReader;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.browser.vo.AuditFileParam;
import com.browser.vo.ExcelColumnParam;
import com.common.exception.CommonException;
import com.common.model.FilePath;
import com.common.result.Result;
import com.common.result.ResultCode;
import com.common.utils.FileUtil;
import com.common.utils.POIUtils;
import com.mbg.mapper.MarksetDataMapper;
import com.mbg.model.MarksetData;
import com.mbg.model.MarksetFile;
import com.mbg.mapper.MarksetFileMapper;
import com.browser.service.IMarksetFileService;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.apache.poi.ss.usermodel.Row;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.DigestUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
/**
* <p>
* 服务实现类
* </p>
*
* @author hnj
* @since 2021-06-17
*/
@Service
public class MarksetFileServiceImpl extends ServiceImpl<MarksetFileMapper, MarksetFile> implements IMarksetFileService {
@Autowired
private MarksetFileMapper marksetFileMapper;
@Override
public Result upload(MultipartFile file, Integer markserId) {
if (file == null || markserId == null) {
return new Result(ResultCode.PARAM_LESS);
}
//文件名称重命名
String oldName = file.getOriginalFilename();
if (!oldName.endsWith("csv") && !oldName.endsWith("xlsx")) {//只允许这两种结尾的文件
return new Result(ResultCode.FILE_FORMAT_WRONG);
}
InputStream inputStream = null;
try {
String suffix = oldName.substring(oldName.lastIndexOf("."));
String newName = System.currentTimeMillis() + suffix;
inputStream = file.getInputStream();
//动态获取excel文件存放位置
String filePath = FileUtil.getByPath(FilePath.UPLOAD_EXCEL_PATH);
//创建文件夹
Path directory = Paths.get(filePath);
if (!Files.exists(directory)) {
Files.createDirectories(directory);
}
Long size = Files.copy(inputStream, directory.resolve(newName));//上传文件,返回大小
String path = filePath + File.separator + newName;
String fileCharset = FileUtil.getFileEncode(path);//获取文件格式
String downloadPath = "/upload/download/" + newName;
MarksetFile marksetFile = new MarksetFile();
marksetFile.setMarksetId(markserId);
marksetFile.setPath(path);
marksetFile.setDownloadPath(downloadPath);
marksetFile.setCharset(fileCharset);
marksetFile.setSize(size.intValue());
marksetFile.setDelStatus("0");
marksetFile.setExt(suffix);
marksetFile.setCreateTime(DateUtils.round(new Date(), Calendar.SECOND));
marksetFile.setName(newName);
marksetFile.setStatus("0");
Integer count = marksetFileMapper.insert(marksetFile);
if (count > 0) {
return new Result();
}
return new Result(ResultCode.FAILED);
} catch (Exception e) {
return new Result(e);
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
throw new CommonException(e.getMessage());
}
}
}
@Override
public List previewFile(Integer fileId, Integer pageSize) {
if (pageSize == null) {
pageSize = 10;
}
List list = new ArrayList<>();
if (fileId == null || pageSize == null) {
return list;
}
//根据文件id查询文件路径
MarksetFile marksetFile = marksetFileMapper.selectById(fileId);
if (marksetFile == null) {
return list;
}
String path = marksetFile.getPath();
if (StringUtils.isBlank(path) || !new File(path).exists()) {
return null;
}
//获取编码格式
String charset = marksetFile.getCharset();
if (StringUtils.isBlank(charset)) {
return list;
}
if (path.endsWith("csv")) { //csv格式
CsvReader csvReader = null;
try {
csvReader = new CsvReader(path, ',', Charset.forName(charset));
Integer num = 0;
while (csvReader.readRecord()) {
Integer columnCount = csvReader.getColumnCount();
num++;
if (num == pageSize + 2) {
return list;
}
List dataList = new ArrayList<>();
if (columnCount != null && columnCount > 0) {
for (int i = 0; i < columnCount; i++) {
String data = csvReader.get(i);
dataList.add(data);
}
}
list.add(dataList);
}
} catch (FileNotFoundException e) {
throw new CommonException(e.getMessage());
} catch (IOException e) {
throw new CommonException(e.getMessage());
} finally {
if (csvReader != null) {
csvReader.close();
}
}
} else if (path.endsWith("xlsx")) {//excel格式
list = POIUtils.readBigFile(path, pageSize);
}
return list;
}
/**
* 导入
*/
@Autowired
private MarksetDataMapper marksetDataMapper;
@Override
@Transactional(rollbackFor = Exception.class)
public Integer auditFile(AuditFileParam param) {
//通过文件id获取到文件状态,包括文件对应的数据集id
Integer fileId = param.getFileId();
if (fileId == null) {
return -1;
}
MarksetFile marksetFile = marksetFileMapper.selectById(fileId);
if (marksetFile == null) {
return -1;
}
List<ExcelColumnParam> columns = param.getList();
if (columns == null || columns.size() == 0) {
return -1;
} else {//通过
Integer marksetId = marksetFile.getMarksetId();
if (marksetId == null) {
return -1;
}
String charset = marksetFile.getCharset();
//获取文件存储路径
String path = marksetFile.getPath();
if (StringUtils.isBlank(path) || !new File(path).exists()) {
return -3;
}
List<MarksetData> list = new ArrayList<>();
if (path.endsWith("csv")) {
list = readCSV(path, charset, columns, marksetId);
} else if (path.endsWith("xlsx")) {
list = readXLSX(path, columns, marksetId);
}
int size = list.size();
if (size == 0) {
return -4;
}
//插入数据库
int number = 200000;//每段元素数量
if (size > number) {//分段上传
int excess = size % number;//余数
int i = (size + excess) / number;//倍数
int start = 0;//开始行
List<MarksetData> dataList = new ArrayList<>();
for (int j = 0; j <= i; j++) {
start = number * j;
if (j < i) {
dataList = list.subList(start, start + number);
} else {
dataList = list.subList(start, start + excess);
}
if (dataList.size() > 0) {
marksetDataMapper.addExcelList(dataList);
}
}
} else {//直接上传
marksetDataMapper.addExcelList(list);
}
QueryWrapper<MarksetData> wrapper = new QueryWrapper<>();
wrapper.eq("markset_id", marksetId);
wrapper.eq("del_status", "0");
Integer successNum = marksetDataMapper.selectCount(wrapper);
return successNum;
}
}
/**
* 处理csv文件类型
*/
private List<MarksetData> readCSV(String path, String charset, List<ExcelColumnParam> columnMaps, Integer marksetId) {
List<MarksetData> list = new ArrayList<>();
CsvReader csvReader = null;
try {
csvReader = new CsvReader(path, ',', Charset.forName(charset));
//跳过表头
csvReader.readHeaders();
while (csvReader.readRecord()) {
MarksetData base = new MarksetData();
for (ExcelColumnParam columnMap : columnMaps) {
Integer index = columnMap.getIndex();
String column = columnMap.getColumn();
if (index == null || StringUtils.isBlank(column)) {
continue;
}
String data = csvReader.get(index);
if (StringUtils.isBlank(data)) {
continue;
}
switch (column) {
case "内容":
base.setContent(data);
base.setContentHash(parse2md5(data) + ";" + marksetId);
base.setCreateTime(DateUtils.round(new Date(), Calendar.SECOND));
break;
}
if (StringUtils.isNotBlank(base.getContent())) {
list.add(base);
}
}
}
} catch (FileNotFoundException e) {
throw new CommonException(e.getMessage());
} catch (IOException e) {
throw new CommonException(e.getMessage());
} finally {
if (csvReader != null) {
csvReader.close();
}
}
return list;
}
/**
* 处理xlsx文件类型
*/
private List<MarksetData> readXLSX(String path, List<ExcelColumnParam> columnMaps, Integer marksetId) {
List<MarksetData> list = new ArrayList<>();
List<Row> rows = POIUtils.readBigExcel(path);
int size = rows.size();
if (rows != null && size > 0) {
for (Row row : rows) {
MarksetData base = new MarksetData();
for (ExcelColumnParam columnMap : columnMaps) {
Integer index = columnMap.getIndex();
String column = columnMap.getColumn();
if (index == null || StringUtils.isBlank(column)) {
continue;
}
String data = row.getCell(index).getStringCellValue();
if (StringUtils.isBlank(data)) {
continue;
}
switch (column) {
case "内容":
base.setContent(data);
base.setContentHash(parse2md5(data) + ";" + marksetId);
base.setCreateTime(DateUtils.round(new Date(), Calendar.SECOND));
break;
}
}
if (StringUtils.isNotBlank(base.getContent())) {
list.add(base);
}
}
}
return list;
}
/**
* 转化为md5
*/
public static String parse2md5(String content) {
if (content != null && content != "") {
String md5Password = DigestUtils.md5DigestAsHex(content.getBytes());
return md5Password;
}
return null;
}
}
3.工具类
POIUtils
/**
*在common.util中,POIUtils.java
**/
package com.common.utils;
import com.monitorjbl.xlsx.StreamingReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.web.multipart.MultipartFile;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@Slf4j
public class POIUtils {
private final static String xls = "xls";
private final static String xlsx = "xlsx";
private final static String DATE_FORMAT = "yyyy/MM/dd";
public static List readBigFile(String filePath, Integer pageSize) {
List sheetData = new ArrayList<String>();
Workbook workbook = getBigWorkBook(filePath);
int sheetNums = workbook.getNumberOfSheets();
Integer num=0;
for (int i = 0; i < sheetNums; i++) {
Sheet sheet = workbook.getSheetAt(i);
if(sheet == null){
continue;
}
for (Row row : sheet) {
num++;
if (num==pageSize+2){
return sheetData;
}
List dataList=new ArrayList();
for (Cell cell : row) {
dataList.add(cell.getStringCellValue());
}
sheetData.add(dataList);
}
}
return sheetData;
}
public static List<Row> readBigExcel(String filePath){
List<Row> list=new ArrayList<>();
Workbook workbook = getBigWorkBook(filePath);
int sheetNums = workbook.getNumberOfSheets();
for (int i = 0; i < sheetNums; i++) {
Sheet sheet = workbook.getSheetAt(i);
if(sheet == null){
continue;
}
Integer num=0;
for (Row row : sheet) {
num++;
if (num==1){
continue;
}
list.add(row);
}
}
return list;
}
public static List readExcel(String filePath,Integer pageSize) throws IOException {
Workbook workbook = getWorkBook(filePath);
List list = new ArrayList();
int numberOfSheets = workbook.getNumberOfSheets();
if(workbook != null){
Integer num=0;
for(int sheetNum = 0;sheetNum < numberOfSheets;sheetNum++){
Sheet sheet = workbook.getSheetAt(sheetNum);
if(sheet == null){
continue;
}
int firstRowNum = sheet.getFirstRowNum();
int lastRowNum = sheet.getLastRowNum();
for(int rowNum = firstRowNum;rowNum <= lastRowNum;rowNum++){
num++;
if (num==pageSize+2){
return list;
}
Row row = sheet.getRow(rowNum);
if(row == null){
continue;
}
Iterator<Cell> cells = row.cellIterator();
List dataList=new ArrayList();
while (cells.hasNext()){
Cell next = cells.next();
dataList.add(next.toString());
}
list.add(dataList);
}
}
workbook.close();
}
return list;
}
public static List<Row> readExcel(String filePath) throws IOException {
Workbook workbook = getWorkBook(filePath);
List<Row> list=new ArrayList<>();
int numberOfSheets = workbook.getNumberOfSheets();
if(workbook != null){
Integer num=0;
for(int sheetNum = 0;sheetNum < numberOfSheets;sheetNum++){
Sheet sheet = workbook.getSheetAt(sheetNum);
if(sheet == null){
continue;
}
int firstRowNum = sheet.getFirstRowNum();
int lastRowNum = sheet.getLastRowNum();
for(int rowNum = firstRowNum+1;rowNum <= lastRowNum;rowNum++){
Row row = sheet.getRow(rowNum);
if(row == null){
continue;
}
list.add(row);
}
}
workbook.close();
}
return list;
}
public static void checkFile(MultipartFile file) throws IOException{
if(null == file){
throw new FileNotFoundException("文件不存在!");
}
String fileName = file.getOriginalFilename();
if(!fileName.endsWith(xls) && !fileName.endsWith(xlsx)){
throw new IOException(fileName + "不是excel文件");
}
}
public static Workbook getBigWorkBook(String filePath) {
Workbook workbook = null;
FileInputStream is=null;
try {
is = new FileInputStream(filePath);
workbook = StreamingReader.builder()
.rowCacheSize(100)
.bufferSize(4096)
.open(is);
} catch (IOException e) {
e.printStackTrace();
}finally {
if (is!=null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return workbook;
}
public static Workbook getWorkBook(String filePath) {
Workbook workbook = null;
FileInputStream is=null;
try {
is = new FileInputStream(filePath);
if(filePath.endsWith(xls)){
workbook = new HSSFWorkbook(is);
}else if(filePath.endsWith(xlsx)){
workbook = new XSSFWorkbook(is);
}
} catch (IOException e) {
e.printStackTrace();
}finally {
if (is!=null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return workbook;
}
public static String getCellValue(Cell cell){
String cellValue = "";
if(cell == null){
return cellValue;
}
String dataFormatString = cell.getCellStyle().getDataFormatString();
if(dataFormatString.equals("m/d/yy")){
cellValue = new SimpleDateFormat(DATE_FORMAT).format(cell.getDateCellValue());
return cellValue;//"2019/10/10"
}
if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC){
cell.setCellType(Cell.CELL_TYPE_STRING);
}
switch (cell.getCellType()){
case Cell.CELL_TYPE_NUMERIC: //数字
cellValue = String.valueOf(cell.getNumericCellValue());
break;
case Cell.CELL_TYPE_STRING: //字符串
cellValue = String.valueOf(cell.getStringCellValue());
break;
case Cell.CELL_TYPE_BOOLEAN: //Boolean
cellValue = String.valueOf(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA: //公式
cellValue = String.valueOf(cell.getCellFormula());
break;
case Cell.CELL_TYPE_BLANK: //空值
cellValue = "";
break;
case Cell.CELL_TYPE_ERROR: //故障
cellValue = "非法字符";
break;
default:
cellValue = "未知类型";
break;
}
return cellValue;
}
}
FileUtil
/**
*在common.util中,FileUtil.java
**/
package com.common.utils;
import info.monitorenter.cpdetector.io.*;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Properties;
public class FileUtil {
public static String getByPath(String path) {
Properties props = System.getProperties();
String property = props.getProperty("os.name");
String userHomePath = props.getProperty("user.home");
String filePath = "";//文件存放地址
if (property.contains("Windows")) {
String[] arr = userHomePath.split(":");
String pan = arr[0] + ":";
filePath = pan + path;
} else if (property.contains("Linux")) {
filePath = path;
}
return filePath;
}
public static String getFileEncode(String path) throws IOException {
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(ASCIIDetector.getInstance());
detector.add(UnicodeDetector.getInstance());
File f = new File(path);
Charset charset = detector.detectCodepage(f.toURI().toURL());
if (charset != null) {
return charset.name();
} else {
return null;
}
}
}
AuditFileParam和ExcelColumnParam
@Data
public class AuditFileParam {
@ApiModelProperty(value = "文件id")
private Integer fileId;
@ApiModelProperty(value = "映射关系")
private List<ExcelColumnParam> list;
}
@Data
private class ExcelColumnParam {
@ApiModelProperty(value = "索引(从0开始)", required = true)
private Integer index;
@ApiModelProperty(value = "映射字段名称", required = true)
private String column;
}