处理百万级Excel/CSV/TXT杂乱数据、提取无重复手机号(mysql+jdbc+poi)

maven工程的pom文件,引入mysql、:

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi</artifactId>
		    <version>3.17</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-ooxml</artifactId>
		    <version>3.17</version>
		</dependency>
		
		
		<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
		<dependency>
		    <groupId>mysql</groupId>
		    <artifactId>mysql-connector-java</artifactId>
		    <version>6.0.6</version>
		</dependency>

首先是DButil连接MySQL工具类:

package com.lpz.excel.mysql.test3ok;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

/**
 * 
 * 
 * @author lpz
 *
 */
public class DButil {
	/*
	 * 打开数据库
	 */
	private static String driver;// 连接数据库的驱动
	private static String url;
	private static String username;
	private static String password;
	private static String dbip = "172.28.16.64";
//	private static String dbname = "testdb";
	private static String dbname = "testdb2";

	static {
//		driver = "com.mysql.jdbc.Driver";// 需要的数据库驱动
		driver = "com.mysql.cj.jdbc.Driver";// 需要的数据库驱动
		url = "jdbc:mysql://" + dbip + ":3306/" + dbname + "?allowMultiQueries=true&autoReconnect=true&useUnicode=true&characterEncoding=UTF-8";// 数据库名路径
		username = "root";
		password = "123456";
	}

	public static Connection open() {
		try {
			Class.forName(driver);
			System.out.println("~~dbname:" + dbname);
			return (Connection) DriverManager.getConnection(url, username, password);
		} catch (Exception e) {
			System.out.println("数据库连接失败!");
			e.printStackTrace();
		} // 加载驱动
		return null;
	}

	/*
	 * 关闭数据库
	 */
	public static void close(Connection conn, PreparedStatement pstmt) {
		if (pstmt != null) {
			try {
				pstmt.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
		if (conn != null) {
			try {
				conn.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}
	
	/*
	 * 关闭数据库
	 */
	public static void close(Connection conn) {
		if (conn != null) {
			try {
				conn.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}

}

使用JDBC查询操作数据库:

使用了批量操作,其中:prepareStatement会先初始化SQL,先把这个SQL提交到数据库中进行预处理,支持批处理,可以绑动态定变量,多次使用可提高效率。createStatement不会初始化,没有预处理,没次都是从0开始执行SQL。

package com.lpz.excel.mysql.test3ok;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;


/**
 *   (转)今天在做一个将excel数据导入数据库的程序时,由于数据量大,准备采用jdbc的批量插入。于是用了preparedStatement.addBatch();
 *   当加入1w条数据时,再执行插入操作,preparedStatement.executeBatch()。
 *   我原以为这样会很快,结果插入65536条数据一共花30多分钟,完全出乎我的意料。于是问了一下同事,他们在处理这种大批量数据导入的时候是如何处理的,发现他们也是用的jdbc批量插入处理,
 *   但与我不同是:他们使用了con.setAutoCommit(false);然后再preparedStatement.executeBatch()之后,再执行con.commit();
 *   于是再试,什么叫奇迹?就是刚刚导入这些数据花了半小时,而加了这两句话之后,现在只用了15秒钟就完成了。于是去查查了原因,在网上发现了如下一段说明:

    * When importing data into InnoDB, make sure that MySQL does not have autocommit mode enabled 
    * because that requires a log flush to disk for every insert. 
    * To disable autocommit during your import operation, surround it with SET autocommit and COMMIT statements:
    
      SET autocommit=0;
     ... SQL import statements ...
     COMMIT;

  *  第一次,正是因为没有setAutoCommit(false);那么对于每一条insert语句,都会产生一条log写入磁盘,所以虽然设置了批量插入,但其效果就像单条插入一样,导致插入速度十分缓慢。
  *   
 * @author lpz
 *
 */
public class JdbcConnection {
	
	// 分批插入
	public static int segmentSize = 10000;

	public static void main(String[] args) {
		insert("yangxu", "yangxu@qq.com");
	}

	/**
	 * 
	 * @param name
	 * @param email
	 */
	public static void insert(String name, String email) {
		String sql = "insert into Haige(name,email) value(?,?)";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setString(1, name);
			pstmt.setString(2, email);
			pstmt.executeUpdate();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}
	}

	/**
	 * 
	 * @param phone
	 */
//	public static void insertUser(String phone) {
//		String sql = "insert into user(phone) value(?)";
//		Connection conn = DButil.open();
//		try {
//			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
//			pstmt.setString(1, phone);
//			pstmt.executeUpdate();
//		} catch (SQLException e) {
//			e.printStackTrace();
//		} finally {
//			DButil.close(conn);
//		}
//	}

	/**
	 * 
	 * @param phone
	 */
//	public static void insertUserNB(String phone) {
//		String sql = "insert into usernb(phone) value(?)";
//		Connection conn = DButil.open();
//		try {
//			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
//			pstmt.setString(1, phone);
//			pstmt.executeUpdate();
//		} catch (SQLException e) {
//			e.printStackTrace();
//		} finally {
//			DButil.close(conn);
//		}
//	}
	
	/**
	 * 
	 * @param phone
	 */
	public static void insertDB(String phone, String dbName) {
		 
		System.out.println("insertDB. phone:" + phone + ", dbName:" + dbName);
		
//		String sql = "insert into usernb(phone) value(?)";
		String sql = "insert into " + dbName + "(phone) value(?)";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setString(1, phone);
			pstmt.executeUpdate();
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}
	}

	/**
	 * 
	 * @param phoneList
	 */
//	public static void insertUserBatch(List<String> phoneList) {
//		String sql = "insert into user(phone) value(?)";
//		Connection conn = DButil.open();
//		PreparedStatement pstmt = null;
//		try {
//			// 关闭事务自动提交
//			conn.setAutoCommit(false);
//			pstmt = (PreparedStatement) conn.prepareStatement(sql);
//			
//			int num = phoneList.size();
//			System.out.printf("to insertUserBatch size: %s", num);
//			System.out.println();
//			
//			for (int i = 0; i < num; i++) {
//				// 把一个SQL命令加入命令列表
//				pstmt.setString(1, phoneList.get(i));
//				pstmt.addBatch();
//				// 1w条记录插入一次
//			    if (i != 0 && i % segmentSize == 0){
//			    	System.out.println("~~segmentation insertUserBatch: " + i);
//			         // 执行批量更新
//			         pstmt.executeBatch();
//			         // 语句执行完毕,提交本事务
//			         conn.commit();
//			     }
//			}
//			// 最后插入不足1w条的数据
//	        pstmt.executeBatch();
//	        conn.commit();
//	        
//		} catch (SQLException e) {
//			e.printStackTrace();
//		} finally {
//			DButil.close(conn, pstmt);
//		}
//	}
	
	/**
	 * 
	 * @param phoneList
	 */
	public static void updateBatch(List<User> userList, String dbName) {
		
		System.out.println("updateBatch size:::" + userList.size() + ", dbName:" + dbName);
		
//		String sql = "update usernb set phone = ? where id = ?";
		String sql = "update " + dbName + " set phone = ? where id = ?";
		Connection conn = DButil.open();
		PreparedStatement pstmt = null;
		try {
			// 关闭事务自动提交
			conn.setAutoCommit(false);
			pstmt = (PreparedStatement) conn.prepareStatement(sql);
			
			int num = userList.size();
			System.out.println("to updateBatch size: " + num);
			
			for (int i = 0; i < num; i++) {
				// 把一个SQL命令加入命令列表
				pstmt.setString(1, userList.get(i).getPhone());
				pstmt.setInt(2, userList.get(i).getId());
				pstmt.addBatch();
				// 1w条记录插入一次
			    if (i != 0 && i % segmentSize == 0){
			    	System.out.println("~~segmentation updateUserNBBatch: " + i);
			         // 执行批量更新
			         pstmt.executeBatch();
			         // 语句执行完毕,提交本事务
			         conn.commit();
			     }
			}
			// 最后插入不足1w条的数据
	        pstmt.executeBatch();
	        conn.commit();
	        
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			DButil.close(conn, pstmt);
		}
	}

	/**
	 * 
	 * @param phoneList
	 */
//	public static void insertUserNBBatch(List<String> phoneList) {
//		String sql = "insert into usernb(phone) value(?)";
//		Connection conn = DButil.open();
//		PreparedStatement pstmt = null;
//		try {
//			// 关闭事务自动提交
//			conn.setAutoCommit(false);
//			pstmt = (PreparedStatement) conn.prepareStatement(sql);
//
//			int num = phoneList.size();
//			System.out.printf("to insertUserNBBatch size: %s", num);
//			System.out.println();
//			
//			for (int i = 0; i < num; i++) {
//				pstmt.setString(1, phoneList.get(i));
//				// 把一个SQL命令加入命令列表 
//				pstmt.addBatch();
//				// 1w条记录插入一次
//			    if (i != 0 && i % segmentSize == 0){
//			    	System.out.println("~~segmentation insertUserNBBatch: " + i);
//			         // 执行批量更新
//			         pstmt.executeBatch();
//			         // 语句执行完毕,提交本事务
//			         conn.commit();
//			     }
//			}
//			// 执行批量更新
//			pstmt.executeBatch();
//			// 语句执行完毕,提交本事务
//			conn.commit();
//			
//		} catch (SQLException e) {
//			e.printStackTrace();
//		} finally {
//			DButil.close(conn, pstmt);
//		}
//	}
	
	
	
	/**
	 * 
	 * @param phoneList
	 */
	public static void insertBatch(List<String> phoneList, String dbName) {
		
		System.out.println("insertBatch size:::" + phoneList.size() + ", dbName:" + dbName);
		
//		String sql = "insert into usernb(phone) value(?)";
		String sql = "insert into " + dbName + "(phone) value(?)";
		Connection conn = DButil.open();
		PreparedStatement pstmt = null;
		try {
			// 关闭事务自动提交
			conn.setAutoCommit(false);
			pstmt = (PreparedStatement) conn.prepareStatement(sql);

			int num = phoneList.size();
			System.out.printf("to insert %s batch size: %s", dbName, num);
			System.out.println();
			
			for (int i = 0; i < num; i++) {
				pstmt.setString(1, phoneList.get(i));
				// 把一个SQL命令加入命令列表 
				pstmt.addBatch();
				// 1w条记录插入一次
			    if (i != 0 && i % segmentSize == 0){
			    	System.out.println("~~segmentation insertBatch: " + i);
			         // 执行批量更新
			         pstmt.executeBatch();
			         // 语句执行完毕,提交本事务
			         conn.commit();
			     }
			}
			// 执行批量更新
			pstmt.executeBatch();
			// 语句执行完毕,提交本事务
			conn.commit();
			
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			DButil.close(conn, pstmt);
		}
	}
	
	/**
	 * 
	 * @return
	 */
	public static List<User> queryUser(String dbName) {
		
		System.out.println("queryUserNB, dbName:" + dbName);
		
		Connection conn = DButil.open();
//		String sql = "select * from usernb";
		String sql = "select * from " + dbName;
		List<User> userList = new ArrayList<User>();
		try {
			Statement pstmt = conn.createStatement();
			ResultSet rs = pstmt.executeQuery(sql);
			while (rs.next()) {
				int id1 = rs.getInt(1);
				String phone = rs.getString(2);
				User user = new User();
				user.setId(id1);
				user.setPhone(phone);
				userList.add(user);
			}
			return userList;
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}
		return null;
	}

}

补充一个JDBC的CRUD小例子:

package com.lpz.excel.mysql.test3.object;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import com.lpz.excel.mysql.test3ok.DButil;

public class jdbcconnection {

	public static void main(String[] args) {
		// insert("yangxu","yangxu@qq.com");
		// Customer c=new Customer();
		 c.setName("zhangbing");
		 c.setEmail("zhangbing@qq.com");
		// //insert(c);
		// c.setId(1001);
		// c.setName("kaixin");
		// Update(c);
		// delete(1006);
		Customer c = query(1005);
		System.out.println(c.getId() + "," + c.getName() + "," + c.getEmail());
	}

	static void insert(Customer c) {
		String sql = "insert into Haige(name,email) value(?,?)";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setString(1, c.getName());
			pstmt.setString(2, c.getEmail());
			pstmt.executeUpdate();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}

	}

	static void Update(Customer c) {
		String sql = "update haige set name=? where id=?";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setString(1, c.getName());
			pstmt.setInt(2, c.getId());
			;
			pstmt.executeUpdate();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}

	}

	static void delete(int id) {
		String sql = "delete from haige where id=?";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setInt(1, id);
			;
			pstmt.executeUpdate();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}
	}

	static Customer query(int id) {
		String sql = "select * from haige where id=?";
		Connection conn = DButil.open();
		try {
			PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
			pstmt.setInt(1, id);
			ResultSet rs = pstmt.executeQuery();
			if (rs.next()) {
				String name = rs.getString(2);
				String email = rs.getString(3);
				Customer c = new Customer();
				c.setId(id);
				c.setName(name);
				c.setEmail(email);
				return c;
			}
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			DButil.close(conn);
		}
		return null;
	}
}
package com.lpz.excel.mysql.test3.object;

public class Customer {
	int id;
	String name;
	String email;

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public String getEmail() {
		return email;
	}

	public void setEmail(String email) {
		this.email = email;
	}

}
package com.lpz.excel.mysql.test3ok;

import java.io.Serializable;

public class User implements Serializable{
	
	/**
	 * 
	 */
	private static final long serialVersionUID = 6364093152797749595L;

	private int id;
	
	private String phone;

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	public String getPhone() {
		return phone;
	}

	public void setPhone(String phone) {
		this.phone = phone;
	}
	
	
	
}

POI读取Excel文件示例:

package com.lpz.excel.test1ok;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

import com.lpz.excel.mysql.test3ok.JdbcConnection;
import com.lpz.excel.mysql.test3ok.User;

/**
 * 评估3处理流程:
 * 1、通过readAndInsertExcel()分成两波数据;一波纯号码、一波杂乱数据
 * 2、删除纯号码的重复记录;(20112 rows in set / 20329 - 217)
 * 		#Select MIN(id) From user Group By phone;
 * 		#Select MIN(id), phone, count(*) From user Group By phone HAVING count(*) > 1;
 * 		select count(1) from (Select MIN(id) From user Group By phone) t;
 * 		select count(1) from user;
  		Delete FROM usernb Where id NOT IN ( 
  			SELECT id from (Select MIN(id) AS id From usernb Group By phone) temp 
  		);		
 * 3、整理杂乱数据中的号码:
 * 		去重:(5885 / 5967 - 82)
 *		去除中文字符 (5787);
 *		删除长度小于11的数据记录(5787 / 5885 - 98)( 5774 / -13):
 *			SELECT phone FROM usernb WHERE CHAR_LENGTH(TRIM(phone)) < 11; 
 *			DELETE FROM usernb WHERE CHAR_LENGTH(TRIM(phone)) < 11;
 *		去重:( 5651 / 5771 - 120)
 *		程序整理以1开头的手机号,整理非数字结尾的数据;(程序处理)
 *		手动删除非号码的行,删除固话括号等杂乱信息(5771);
 *		截取前11位,插入11位后的数据;
 *		重新整理数据开头部分分数字情况,删除非数字开头部分等;
 *		手动检查替换修改中间等地方的特殊字符;
 *	4、再次删除重复的记录:
 * 	
 * 评估1处理流程:
 * 1、读入Excel字段;
 * 2、去除中文汉字空格;
 * 3-1、删除空记录:(-88 = 8049)
 * 		DELETE FROM `user` WHERE phone = "";
 * 3-2、删除长度小于11位的记录:(-43 = 8006)
 * 		DELETE FROM user WHERE CHAR_LENGTH(TRIM(phone)) < 11;
 * 4、整理规范,删除头部尾部多余字符:删除首尾非数字字符;(程序处理);
 * 5、程序整理非1开头的手机号;
 * 5、手动检查删除固话等无用号码,及中间特殊字符;
 * 6、删除重复的记录:(7329 / -672)
 * 		Delete FROM user Where id NOT IN ( 
  			SELECT id from (Select MIN(id) AS id From user Group By phone) temp 
  		);
 * 
 * @author lpz
 *
 */
public class ReadExcelForXSSF {

	public static int phoneListSize = 30000;
	public static int phoneListNBSize = 10000;
	
	
//	public static String fileNamePath = "g:/test.xlsx";
	public static String fileNamePath = "g:/评估1.xlsx";
	
	/**
	 * 
	 * @param args
	 */
	public static void main(String[] args) {
		
		// 读取数据
//		long startTime = System.currentTimeMillis();  
//		new ReadExcelForXSSF().readAndInsertExcel();
//		
//		long endTime = System.currentTimeMillis();  
//        System.out.printf("readExcel and insert 用时:%sms-------------------------------", (endTime - startTime));  
//        System.out.println();
        
        // 处理数据
        long startTime = System.currentTimeMillis();  
        new ReadExcelForXSSF().dealWithUserNB();
        
        long endTime = System.currentTimeMillis();  
        System.out.printf("dealWithUserNB 用时:%sms-----------------------", (endTime - startTime));  
        System.out.println();
		
		
//		System.out.println(removeChineseStr("   sdf发AA三大赛的  发生地方生8 88   "));
	}
	
	
	public void dealWithUserNB() {
		List<User> userNBList = JdbcConnection.queryUser("user");
		System.out.println("dealWithUserNB queryUser user size:" + userNBList.size());
		
		List<User> toUpdateUserNBList = new ArrayList<User>();
		
//		List<String> insertPhoneNB2List = new ArrayList<String>(phoneListNBSize);
		for (User user : userNBList) {
			String phone = user.getPhone();
//			phone = removeChineseStr(phone);
//			phone = phone.replace(":", "");
//			phone = phone.replace("、", "");
//			phone = phone.replace("/", "");
//			phone = phone.replace("  ", "");
//			phone = phone.replace("、", "");
//			if (phone.startsWith(" ")) {
//			    phone = phone.replace(" ", "").trim();
//			    user.setPhone(phone);
//			    toUpdateUserNBList.add(user);
//			}
//			if (phone.startsWith(";:")) {
//				phone = phone.replace(";:", "").trim();
//				user.setPhone(phone);
//				toUpdateUserNBList.add(user);
//			}
//			if (phone.startsWith("  ")) {
//				phone = phone.replace("  ", "").trim();
//				user.setPhone(phone);
//				toUpdateUserNBList.add(user);
//			}
//			if (!phone.startsWith("1")) {
//				System.out.println(user.getId()+ "---------" + user.getPhone());
//			}
			
			
			// 去除末尾非数字
			int length = phone.length();
			if (!Character.isDigit(phone.charAt(length-1))) {
				System.out.println(user.getId()+ "---------" + user.getPhone());
//				phone = phone.replace("()", "");
//				phone = phone.replace("  ", "");
//				phone = phone.replace(" ", "");
//				phone = phone.replace(";", "");
//				phone = phone.replace(",", "");
//				phone = phone.replace(";)", "");
//				user.setPhone(phone.trim());
//				toUpdateUserNBList.add(user);
			}
			
			if (phone.endsWith(".") || phone.endsWith(",") || phone.endsWith(",")) {
				phone = phone.substring(0, phone.length() - 1);
				user.setPhone(phone.trim());
				toUpdateUserNBList.add(user);
			}
			
			
			
//			if (phone.length() > 11) {
//				// 拆分成多部分,插入
//				String substr1 = phone.substring(0, 11);
//				insertPhoneNB2List.add(substr1);
//				//
//				String substr2 = phone.substring(11, phone.length());
//				if (substr2.length() < 11) {
//					// 丢弃
//					System.out.println("长度小于11,丢弃:" + substr2 + ", phone:" + phone);
//				} else {
//					// >=11
//					insertPhoneNB2List.add(substr2);
//				}
//			} else {
//				// 直接加进来
//				insertPhoneNB2List.add(phone);
//			}
			
			
		}
		
//		JdbcConnection.insertBatch(insertPhoneNB2List, "user");
		
		JdbcConnection.updateBatch(toUpdateUserNBList, "user");
		
		
	}
	
	
	
	/**
	 * 
	 */
	public void readAndInsertExcel() {
		// 读取文件
		File file = new File(fileNamePath);
		if (!file.exists()) {
			System.out.println("file not exist!!!");
			return;
		}
		// 解析Excel
		InputStream inputStream = null;
		Workbook workbook = null;
		try {
			inputStream = new FileInputStream(file);
			workbook = WorkbookFactory.create(inputStream);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (null != inputStream) {
				try {
					inputStream.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
			
		// list for batch
		List<String> phoneList = new ArrayList<String>(phoneListSize);
//		List<String> phoneNBList = new ArrayList<String>(phoneListNBSize);
		// 组装获取
		acquirePhone(workbook, phoneList, null);
		
		
		// db table1
		JdbcConnection.insertBatch(phoneList, "user");
		// db table2 需要二次处理的手机号
//		JdbcConnection.insertBatch(phoneNBList, "usernb");

//		// 将修改好的数据保存
//		OutputStream out = new FileOutputStream(file);
//		workbook.write(out);
		
	}
	
	/**
	 * 
	 * @param rowLength
	 * @param sheet
	 * @param row
	 * @param cell
	 * @param nullRow
	 * @param nullCell
	 * @param nullStrValue
	 * @param phoneList
	 * @param phoneNBList
	 */
	private void acquirePhone(Workbook workbook, List<String> phoneList, List<String> phoneNBList){
		// 工作表对象,sheet表单
		Sheet sheet = workbook.getSheetAt(11);
		// 总行数
		int rowLength = sheet.getLastRowNum() + 1;
		// 工作表的列
		Row row = sheet.getRow(0);
		// 总列数
		int colLength = row.getLastCellNum();
		// 得到指定的单元格
		Cell cell = row.getCell(0);
		// 得到单元格样式
//		CellStyle cellStyle = cell.getCellStyle();
		System.out.println("行数:" + rowLength + ", 列数:" + colLength);
		
		int nullRow = 0;
		int nullCell = 0;
		int nullStrValue = 0;
		for (int i = 0; i < rowLength; i++) {
			row = sheet.getRow(i);
			// 只有一列数据
//			for (int j = 0; j < colLength; j++) {
			// 如果为空行,则跳过
			if (null == row) {
				nullRow++ ;
				continue;
			}
			cell = row.getCell(0);
			// Excel数据Cell有不同的类型,当我们试图从一个数字类型的Cell读取出一个字符串时就有可能报异常: Cannot get a STRING value from a NUMERIC cell
			// 将所有的需要读的Cell表格设置为String格式
			if (cell != null) {
				cell.setCellType(CellType.STRING);
			} else {
				nullCell++;
				// 如果为空列,则跳过
				continue;
			}
			// 获取值
			String stringCellValue = cell.getStringCellValue().trim();
//			System.out.println(stringCellValue + "----" + i);
			if (isStrEmpty(stringCellValue)) {
				nullStrValue++;
				continue;
			}
			
			phoneList.add(stringCellValue);
			// 存入数据库
//			if (isMobileNum(stringCellValue)) {
//				phoneList.add(stringCellValue);
//			} else {
//				phoneNBList.add(stringCellValue);
//			}
//			}
		}
		// 
		System.out.printf("nullRow:%s, nullCell:%s, nullStrValue:%s", nullRow, nullCell, nullStrValue);
		System.out.println();
	}
	
	/**
	 * 
	 * @param str
	 * @return
	 */
	public static Boolean isStrEmpty(String str) {
		return null == str || str.trim() == "";
	}
	
	
	/**
	 * 校验手机号格式
	 *
	 * @param number
	 * @return
	 */
	public static boolean isMobileNum(String number) {
	    /*
	    移动:134、135、136、137、138、139、150、151、157(TD)、158、159、187、188
	    联通:130、131、132、152、155、156、185、186
	    电信:133、153、177、178、180、189、(1349卫通)
	    总结起来就是第一位必定为1,第二位必定为3或5或8,其他位置的可以为0-9
	    */
	    String num = "[1][34578]\\d{9}";//"[1]"代表第1位为数字1,"[3578]"代表第二位可以为3、5、7、8中
	                                    // 的一个,"\\d{9}"代表后面是可以是0~9的数字,有9位。
//	    if (null == number || "" == number.trim()) {
//	        return false;
//	    } else {
	        //matches():字符串是否在给定的正则表达式匹配
	        return number.matches(num);
//	    }
	}
	
	private static String REGEX_CHINESE = "[\u4e00-\u9fa5]";// 中文正则
	
	/**
	 * // 去除中文
	 * @param str
	 * @return
	 */
	public static String removeChineseStr(String str) {
        Pattern pat = Pattern.compile(REGEX_CHINESE);
        Matcher mat = pat.matcher(str);
        return mat.replaceAll("").trim();
    }


}

PS:网上找的小结:

     * HSSFWorkBook:操作2003版本以前的(包括2003版本),扩展名.xls,该类在org.apache.poi:poi中
     * XSSFWorkBook:操作2007版本以后的(包括2007版本),拓展名.xlsx,该类在org.apache.poi:poi-ooxml中
     * SXSSFWorkBook:对于海量的数据进行操作



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值