关于使用JDBC的API进行批量操作的优化

最新推荐文章于 2022-05-02 11:38:35 发布

xjh2011xjh

最新推荐文章于 2022-05-02 11:38:35 发布

阅读量1.3k

点赞数

分类专栏： Java 文章标签： Java

本文链接：https://blog.csdn.net/xjh2011xjh/article/details/39988665

版权

Java 专栏收录该内容

42 篇文章 1 订阅

订阅专栏

一、引言

在使用JDBC的API对数据进行批量操作的时候，我们经常遇到一个问题：当某一批次中的一行数据出现问题时（或者说一行中某个字段有错误而抛出异常），该如何处理？

处理方式通常可归为以下两种方式：

方式1. 将异常捕获，并且让事务回滚，使全部的的批操作无效，然后简单的告诉用户，批操作失败？

方式2. 只是将本次的批操作发生的异常捕获，不让事务回滚，继续执行后序的批操作？

如果是用方式1来处理批量操作的异常，这固然对开发者来说简单明了，但对于用户来说是不够人性化的。方式1的处理方式，好似把程序的错误抛给用户来处理似的，似乎有点不负责任的态度。

假如这些批量的数据来自文件当中（假设是一个excel文件当中），数据达到成千上万条数据量，其中只是某个字段写错，在进行批量操作的时候出现了异常，如果用方式1处理，

告诉用户批量操作失败，让用户重新检查文件的每一行每一列，可想而知，用户是不是疯了？

好，那就用方式2来处理，考虑到方式1对于异常的处理方式不够人性化，所以我们在方式2中提出以下改进：

1. 对于某一批次某一行（或多行）的数据出错，我们需要得到这一行（或多行）的数据和错误信息，以便告知用户是哪一行出错了，错在哪里。

2. 对于某一批次某一行（或多行）的数据出错，我们不应该让这一行（或多行）出错数据影响到其他正确行的执行，也就是说遇到出错数据，执行需要继续，要把正确的行正常入库。

二、解决思路

为了解决以上两个问题，我自己经过思考，提出了以下思路：

1.对于某一批次某一行（或多行）的数据出错，要得到这一行（或多行）的数据和错误信息，对应建立一个存储这些出错信息的实体类FileImportErrorDTO

/**
 * 
 * 功能：文件导入出错信息DTO
 * 创建时间：2014-10-10 上午10:15:42
 * 修改时间：
 * 作者：xujianhua
 */
public class FileImportErrorDTO {
	private int rowNumber;			//行号
	private String[] rowContent;	        //行内容
	private String errorMsg;		//错误信息
	
	public FileImportErrorDTO() {
		super();
	}
	public FileImportErrorDTO(int rowNumber, String[] rowContent,
			String errorMsg) {
		super();
		this.rowNumber = rowNumber;
		this.rowContent = rowContent;
		this.errorMsg = errorMsg;
	}
	public int getRowNumber() {
		return rowNumber;
	}
	public void setRowNumber(int rowNumber) {
		this.rowNumber = rowNumber;
	}
	public String[] getRowContent() {
		return rowContent;
	}
	public void setRowContent(String[] rowContent) {
		this.rowContent = rowContent;
	}
	public String getErrorMsg() {
		return errorMsg;
	}
	public void setErrorMsg(String errorMsg) {
		this.errorMsg = errorMsg;
	}
	
}

2. 由于使用的是JDBC的API，涉及到PreparedStatement的参数设置问题，为了让使用者更加灵活的设置PreParedStament中参数（通常设置PreparedStatement的sql参数是比较复杂多变的所以一般不会有固定写法），我这里设计了一个接口ParameterToPreparedStatment，让使用者自己实现将参数设置进PreparedStatement这步。

import java.sql.PreparedStatement;

/**
 * 
 * 功能：将二维数组中的数据设置进PreparedStatement中，该接口与BatchImportUtil搭配使用
 * 创建时间：2014-10-10 上午10:11:58
 * 修改时间：
 * 作者：xujianhua
 */
public interface ParameterToPreparedStatement {
	public void paramToPreState(PreparedStatement preStat, String[][] allDatasStrArr , int row) throws Exception;
}

3. 进行批量操作的工具类BatchImportUtil，是真正完成批量入库的类。
这个类的进行批量操作的方法，在某一批次遇到到异常时，会记录这一批次的出错行，并且跳过出错行继续往下执行。

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import javax.servlet.http.HttpServletRequest;


/**
 * 
 * 功能：批量导入工具类
 * 创建时间：2014-10-10 上午10:17:09
 * 修改时间：
 * 作者：xujianhua
 */
public class BatchImportUtil {
	
	//该接口只有一个抽象方法，目的：把参数设置进入PreparedStatement
	private ParameterToPreparedStatement ptp; 
        //默认10条数据作为一批
        private static final int DEFAULT_BATCH_COUNTS = 10; 
	
	public BatchImportUtil(ParameterToPreparedStatement ptp) {
		this.ptp = ptp;
	}

	/**
	 * 
	 * 功能：文件数据批量进入数据库。
	 * 注意：
	 * 	   1.二维数组allDatasStrArr第一维的索引值与导入文件数据行的行号要相对应
	 * 	   2.导入过程中若出错，本方法将会把出错信息写入出错日志文件中，出错日志文件的名称格式为：fileName + "_" + "importErrorLog.txt"
	 * 	              下载路径为： 应用根目录 + "/downloadFile/" + fileName + "importErrorLog.txt"
	 * 创建时间：2014-10-10 上午10:36:06
	 * 修改时间：
	 * 作者：xujianhua
	 * @param fileName 文件名称（包含后缀名，如：test.xls）
	 * @param sql	         需要执行的sql
	 * @param allDatasStrArr  文件内容封装在这个二维字符串数组中，需要注意的是：sql的占位符（字段值）与这个数组的第二维要一一对应
	 * @param oneBatchRecords 一批执行多少条记录
	 * @return ResponseEntity
	 */
	public ResponseEntity fileDataToDB(String fileName, String sql, String[][] allDatasStrArr, int oneBatchRecords){

		//resEntity 是一个封装批操作的结果信息（批操作执行的成功数和出错数、以及出错提示等）的类实例（这里不是重点）
		ResponseEntity resEntity = new ResponseEntity();
		
		Connection conn = null;
		PreparedStatement preStat = null;
		
		try{
			conn = DBUtil.getConnection();         //DBUtil是一个可以获取数据库连接的工具类（这里不是重点）
			preStat = conn.prepareStatement(sql);
		}catch(Exception e){
			System.out.println("获取数据库连接失败！");
			throw new RuntimeException(e);
		}

///以下部分为本文讨论的重点//
		
		int rows = allDatasStrArr.length;	//总行数
		int columns = allDatasStrArr[0].length; //总列数
		
		//一批多少条记录
		if(oneBatchRecords <= 0){
			oneBatchRecords = DEFAULT_BATCH_COUNTS; //如果不指定就用默认的
		}
		//执行批量操作时是否有错误，默认无错误
		boolean executeBatchHaveError = false;
		//异常信息
		String errorMsg = "";
		//批量操作执行完成后返回的错误集合
		List<FileImportErrorDTO> errorList = new ArrayList<FileImportErrorDTO>(); 
		//记录每一批出错的索引，这个索引总是整除oneBatchRecords一批的记录数
		List<Integer> errorIndexList = new LinkedList<Integer>();
		//每一批的起始索引
		int batchBeginIndex = 2;
		
		//实际数据的行数，因为第一行（allDatasStrArr[0]）是标题信息，不是数据内容
		int actualRows = rows - 1;  
		
		try{
			//设置自己控制事务
			conn.setAutoCommit(false);
			//执行批操作
			for(int row = 1; row <= actualRows; row++){
				try{
					//设置参数
					this.ptp.paramToPreState(preStat, allDatasStrArr, row);
					//加入批次中
					preStat.addBatch();
					executeBatchHaveError = false;
				}catch(Exception e){
					e.printStackTrace(); //运营阶段可以去掉
					//如果期间遇到了错误
					executeBatchHaveError = true;
					//记录错误
					errorMsg = e.getMessage();
				}
				if(executeBatchHaveError){
					//存入记录错误的集合中
					FileImportErrorDTO errorDTO = new FileImportErrorDTO(
							row + 1, allDatasStrArr[row] , errorMsg);//这里row+1 表示的是出错所在行（excel中的行）
					errorList.add(errorDTO);  
					continue;
				}
				
				//计算数量是否达到一批了（每oneBatchRecords条执行一次）
				if( row % oneBatchRecords == 0 ){ 
					try{
						//执行批量操作
						preStat.executeBatch(); //运营阶段可以去掉
						executeBatchHaveError = false;
					}catch (SQLException e) {
						e.printStackTrace();
						executeBatchHaveError = true;
						errorMsg = e.getMessage();
					}
					
					if(executeBatchHaveError){ //如果有出错的
						
						//这次批次执行后的结果，这个值代表：成功插入了前面executeResult条数据
						int executedSuccessCounts = preStat.getUpdateCount();
						
						if(errorIndexList.contains(row)){ //如果是同批次中的错误
							//得到那条出错的记录的下一条所在位置(同批次里面出现多于2个错误)
							batchBeginIndex = batchBeginIndex + executedSuccessCounts + 1;
						}else{
							//加入出错的索引容器中
							errorIndexList.add(row);
							//得到那条出错的记录的下一条所在位置
							batchBeginIndex = (row - oneBatchRecords + 1 + executedSuccessCounts) + 1;
						}
						
						//存入记录错误的集合中
						FileImportErrorDTO errorDTO = new FileImportErrorDTO(
							batchBeginIndex, allDatasStrArr[batchBeginIndex -1] , errorMsg);//这里batchBeginIndex表示的是出错所在行（excel中的行）
						errorList.add(errorDTO);  
						//重新定位index, 从发生错误的下一条，执行批量
						row = batchBeginIndex - 1; //因为每次for循环index都会加1所以，这里需要-1抵消
					}
					
					//清除上一次里面的sql信息
					preStat.clearBatch();
				}
				
			}
			
			//记录最后一批执行过程中是否有错
			boolean lastBatchHasError = false;
			try{
				//执行剩下的
				preStat.executeBatch();
				lastBatchHasError = false;
			}catch (SQLException e) {
				e.printStackTrace(); //运营阶段可以去掉
				lastBatchHasError = true;
				errorMsg = e.getMessage();
			}
			
			//判断是否有错误（如果最后一批的sql数量为0那么是不可能有错误的）
			if(lastBatchHasError){ //如果有错误
				
				//看这最后一批的数目(即余数)
				int remainder = actualRows % oneBatchRecords;
				
				//如果最后一批只有一条
				if(remainder == 1){
					//那么只要加入错误集合中便可
					FileImportErrorDTO errorDTO = new FileImportErrorDTO(//这里batchBeginIndex表示的是出错所在行（excel中的行）
						batchBeginIndex + 1, allDatasStrArr[batchBeginIndex] , errorMsg); //数组的中的错误行索引总是比excel行小1
					errorList.add(errorDTO);  
				}
				else{ //如果最后一批不止一条记录
					
					//得到成功的记录数
					int remainExecutedSuccessCounts = preStat.getUpdateCount();
					
					//默认起始索引，从:总数-余数+1 开始 + 1(跳过出错的)
					int beginIndex = actualRows - remainder + 1 + 1;  
					
					//跳过那些： 前面成功的  + 出错的
					beginIndex = remainExecutedSuccessCounts + beginIndex; 
					
					//把错误的加入错误集合
					FileImportErrorDTO errorDTO = new FileImportErrorDTO(//这里batchBeginIndex表示的是出错所在行（excel中的行）
							beginIndex, allDatasStrArr[beginIndex - 1] , errorMsg); //数组的中的错误行索引总是比excel行小1
					errorList.add(errorDTO);  
					
					//清空之前的批次sql
					preStat.clearBatch();
					
					do{
						//继续执行剩下的
						for(int row = beginIndex; row <= actualRows; row++){ //这里数组索引从1开始则用<=号
							try{
								//设置参数
								this.ptp.paramToPreState(preStat, allDatasStrArr, row);
								preStat.addBatch();
								lastBatchHasError = false;
							}catch(Exception e){
								e.printStackTrace(); //运营阶段可以去掉
								//如果期间遇到了错误
								lastBatchHasError = true;
								//记录错误
								errorMsg = e.getMessage();
							}
							if(lastBatchHasError){
								//存入记录错误的集合中
								FileImportErrorDTO errorDTOTemp = new FileImportErrorDTO(//这里batchBeginIndex表示的是出错所在行（excel中的行）
									batchBeginIndex, allDatasStrArr[batchBeginIndex - 1] , errorMsg); //数组的中的错误行索引总是比excel行小1
								errorList.add(errorDTOTemp);  
								continue;
							}
						}
						
						try{
							//执行剩下的
							preStat.executeBatch();
							lastBatchHasError = false;
						}catch (SQLException e) {
							e.printStackTrace(); //运营阶段可以去掉
							lastBatchHasError = true;
							errorMsg = e.getMessage();
						}
						
						if(lastBatchHasError){
							//得到成功的记录数
							int executedSuccessCounts = preStat.getUpdateCount();
							
							//跳过： 前面成功的+出错的
							beginIndex = beginIndex + executedSuccessCounts + 1; 
							
							//把错误的加入错误集合
							FileImportErrorDTO errorDTOTemp = new FileImportErrorDTO(//这里batchBeginIndex表示的是出错所在行（excel中的行）
								beginIndex, allDatasStrArr[beginIndex - 1] , errorMsg); //数组的中的错误行索引总是比excel行小1
							errorList.add(errorDTOTemp);  
							
							//清空之前的批次sql
							preStat.clearBatch();
						}
						
					}while(lastBatchHasError);
				}
				
			}
			
			//提交事务
			conn.commit();
			
		}catch(SQLException e){
			e.printStackTrace();
			try {
				//事务回滚
				conn.rollback();
			} catch (SQLException sqle) {
				sqle.printStackTrace();
			}

                        //ResponseEntity.RESPONSE_TYPE_ERROR表示返回给客户端的是出错的信息
			resEntity.setType(ResponseEntity.RESPONSE_TYPE_ERROR);
			resEntity.setMessage("对不起，文件导入出错了！");
			return resEntity;
			
		}finally{ //释放资源
			try {
				if(preStat != null)
					preStat.close();
				if(conn != null)
					conn.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}

 ///以上部分为本文讨论的重点//

		//得到错误信息数量
		int errorRowCounts = errorList.size();
		//得到成功的行数
		int successRowCounts = allDatasStrArr.length - errorRowCounts -1 ;
		//提示信息
		String tips = "";
		
		//判断导入过程是否有出错
		if(errorRowCounts == 0){
			tips = "导入" + fileName + "完成, 成功导入" + successRowCounts + "条数据, 没有导入失败的数据。";
			resEntity.setType(ResponseEntity.RESPONSE_TYPE_INFO);
		}else{
			tips = "导入" + fileName + "完成, 成功导入" + successRowCounts + "条数据, 导入失败的数据有" + errorRowCounts + "条" +
					"，请查看错误日志文档！";
			//提示客户端有警告信息
			resEntity.setType(ResponseEntity.RESPONSE_TYPE_WARN);
		}
		
		//设置错误提示信息
		resEntity.setMessage(tips);

		//得到错误文件的路径
		HttpServletRequest request = FlexContext.getHttpRequest();
		String errorLogInforFilePath = request.getSession().getServletContext().getRealPath("/") +
									"downloadFile/" + fileName + "_" + "importErrorLog.txt";
		//错误文件的包装类
		File errorLogInforFile = new File(errorLogInforFilePath);
		
		try{
		
			//如果文件不存在就创建
			if( !errorLogInforFile.exists() ){
				errorLogInforFile.createNewFile();
			}
			
			//将错误提示信息写入文件
			FileOutputStream fileOutputStream = new FileOutputStream(errorLogInforFile);
			
			//将错误信息写入文件
			for(FileImportErrorDTO errorDTO:errorList){
				String[] errorRow = errorDTO.getRowContent();
				String msg = errorDTO.getErrorMsg(); 
				StringBuilder errorBuilder = new StringBuilder("第" + errorDTO.getRowNumber() + "行，[");
				for(int index = 0; index < columns; index++){
					errorBuilder.append(errorRow[index] + ", ");
				}
				errorBuilder.delete(errorBuilder.lastIndexOf(","), errorBuilder.length()); //去掉多余的,
				errorBuilder.append("], 存在错误：" + msg + "\r\n");
				fileOutputStream.write(errorBuilder.toString().getBytes());
			}
			Date date = new Date();
			SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
			String timeStr = "出错时间：" + sf.format(date) + "\r\n";
			fileOutputStream.write(timeStr.getBytes());
			fileOutputStream.close();
			
		}catch(IOException ioe){
			ioe.printStackTrace();
			System.out.println("出错日志文件写入异常！");
		}
		return resEntity;
	}
}

4. 如何使用以上3个类。

//sql语句
String sql =  "insert into tableName(" +
"column1, column2, column3, column4, column5, column6, column7, column8)" +
"values (?, ?, ?, ?, ?, ?, ?, ?)";


//批量导入工具类
BatchImportUtil importUtil = new BatchImportUtil(
    new ParameterToPreparedStatement(){
       @Override
       public void paramToPreState(PreparedStatement preStat, String[][] allDatasStrArr , int row) throws Exception{
            //这里把放入preStat的sql（上面的sql）的?(占位符)，用二维字符串数组的第row行的数据进行填充。
            //如：preStat.setObject(1, allDatasStrArr[row][0]);
       }
    }
);


/*
excelFileName为需要批量数据导入的excel文件名称，
allSheetStrArr表示从excel文件读出的数据存储在这个二维字符串数组中，
10表示每一批次执行10条数据
*/
resEntity = importUtil.fileDataToDB(excelFileName, sql, allSheetStrArr, 10);