1、记录一下解决问题的思路,时间比较紧,直接上代码,逻辑处理比较多,直接看main方法就行,后期再补充完善一下这篇文章
package com.ruoyi.project.shop.util;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
public class XLSX2CSV {
private List<String[]> rows = new ArrayList<String[]>();
private final OPCPackage xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private int minColumns;
/**
* Destination for data
*/
private class SheetToCSV implements SheetContentsHandler {
private String[] record;
private int minColumns;
private int thisColumn = 0;
public SheetToCSV(int minColumns) {
super();
this.minColumns = minColumns;
}
@Override
public void startRow(int rowNum) {
record=new String[this.minColumns];
// System.out.println("################################:"+rowNum);
}
@Override
public void endRow(int rowNum) {
thisColumn=0;
rows.add(this.record);
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
if(thisColumn<this.minColumns)
record[thisColumn]=formattedValue;
thisColumn++;
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
}
/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, int minColumns) {
this.xlsxPackage = pkg;
this.minColumns = minColumns;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
public List<String[]> process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
//this.output.println();
//this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, new SheetToCSV(this.minColumns), stream);
stream.close();
++index;
}
return this.rows;
}
/**
* 得到excel的记录
* @param excelPath
* @param minColumns 输出多少列
* @return
* @throws Exception
*/
public static List<String[]> getRecords(String excelPath,int minColumns) throws Exception{
File xlsxFile = new File(excelPath);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return null;
}
// The package open is instantaneous, as it should be.
OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, minColumns);
List<String[]>list=xlsx2csv.process();
p.close();
return list;
}
/**
*
* @param args
* @throws Exception
* /**
*/
public static void main(String[] args) throws Exception {
long aa = System.currentTimeMillis();
List<String[]> list =getRecords("C:\\Users\\Administrator\\Desktop\\aa.xlsx",7);
System.out.println(list.size());
long bb = System.currentTimeMillis();
System.out.println("数据读取所用的时间"+(bb-aa));
/**
* 数据库操作 采用事务加手动提交的方式,效率为69w的数据最多耗时187s
*/
String url = "jdbc:mysql://127.0.0.1:3306/fast_motion?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8";
String user = "root";
String password = "root";
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
conn = DriverManager.getConnection(url, user, password);
long aa1 = System.currentTimeMillis();
String sql = "INSERT INTO `fast_motion`.`fast_shose` ( `shose_name`, `shose_no`, `shose_price`, `shose_discount_price`, `shose_img`, `creaet_time`, `shose_size`, `shose_num`, `shose_stock`, `onhead`, `intransit`, `updata_time`) VALUES ";
// String sql = "INSERT INTO `fast_motion`.`fast_shose` (`shose_id`, `shose_name`, `shose_no`, `shose_price`, `shose_discount_price`, `shose_img`, `shose_change`, `shose_change_state`, `creaet_time`, `user_id`, `user_name`, `buy_and_sell`, `out_time`, `shose_size`, `shose_num`, `shose_stock`, `status`, `onhead`, `intransit`, `updata_time`, `yuliu`, `yuliu2`, `yuliu3`, `yuliu4`) VALUES (?, ?, ?, ?, ?, ?,?,?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);";
pstm = conn.prepareStatement(sql);
Long startTime = System.currentTimeMillis();
conn.setAutoCommit(false); //开启手动提交
StringBuffer canshu = new StringBuffer(); //多次修改String
int num=0;
for ( String a[]: list) {
if(num!=0){
num++;
//sql拼接,参数封装
canshu.append("("+"'暂时没有','"+a[0]+"','"+a[2]+"','"+a[3]+"',"+"'https://ss3.bdstatic.com/70cFv8Sh_Q1YnxGkpoWK1HF6hhy/it/u=2553590465,1136338498&fm=26&gp=0.jpg'"+","
+"SYSDATE()"+",'"+a[1]+"',"+null+",'"+a[4]+"','"+a[5]+"','"+a[6]+"',"+"SYSDATE()"+"),");
if(num>100000){
String sql2 = sql + canshu.substring(0, canshu.length()-1); //构建完整的sql
pstm.addBatch(sql2); //添加sql到缓存
System.out.println("提交了一次数据");
pstm.executeBatch(); //执行sql
conn.commit(); //提交
canshu= new StringBuffer();
num=0;
}
}else{
num++;
}
}
//提交剩余不足50000的数据
String sql2 = sql + canshu.substring(0, canshu.length()-1);
pstm.addBatch(sql2);
pstm.executeBatch();
conn.commit();
Long endTime = System.currentTimeMillis();
System.out.println("用时:" + (endTime - startTime));
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
}
}
一般的注释都在上面,需要的可以看看,文件处理的内容在下面,亲测的,没问题,可以处理20M以上的excel文件,60w条数据一次性插入,用的是jdbc的事务加缓存批量拼接的sql,单条sql+手动提交处理60w的数据时间上是180s左右,事务缓存+拼接sql 60w的数据处理是42s左右,可能还可以优化,我是本地测试的,服务器速度可能更快点,我还没测试,后期补充一下。