使用java 语言 提取excel 中的手机号码,xls 格式以及xlsx 格式的excel 文件
pom.xml 文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.sfy</groupId>
<artifactId>sfy-excel</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<name>sfy-excel</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
ExcelUtils.class
package cn.sfy.utils;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* <p>
* Excel 工具类
* </p>
*
* @package: cn.sfy.utils
* @description:
* @author: james9203@163.com
* @date: Created in 2021/3/12 17:09
* @copyright: Copyright (c) 2021
* @version: V1.0
* @modified: james9203@163.com
*/
public class ExcelUtils {
private static final String XLS = "xls";
private static final String XLSX = "xlsx";
private static final DateFormat FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
/**
* 输出数据到自定义模版的Excel输出流
*
* @param excelTemplate 自定义模版文件
* @param data 数据
* @param outputStream Excel输出流
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static void writeDataToTemplateOutputStream(File excelTemplate, List<List<Object>> data, OutputStream outputStream) throws Exception {
Workbook book = ExcelUtils.getWorkbookFromExcel(excelTemplate);
ExcelUtils.writeDataToWorkbook(null, data, book, 0);
ExcelUtils.writeWorkbookToOutputStream(book, outputStream);
}
/**
* 从Excel文件获取Workbook对象
*
* @param excelFile Excel文件
* @return Workbook对象
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static Workbook getWorkbookFromExcel(File excelFile) throws Exception {
try {
Workbook workbook = null;
try (
InputStream inputStream = new FileInputStream(excelFile);
) {
String name = excelFile.getName();
if(name.endsWith("xls")){
workbook = new HSSFWorkbook(inputStream);
if(workbook!=null){
return workbook;
}
}
workbook = WorkbookFactory.create(inputStream);
if(workbook != null){
return workbook;
}
else {
throw new IOException("文件类型错误");
}
}
}catch (Exception e){
throw e;
}
}
/**
* 把Workbook对象内容输出到Excel文件
*
* @param book Workbook对象
* @param file Excel文件
* @throws FileNotFoundException 找不到文件异常,文件已创建,实际不存在该异常
* @throws IOException 输入输出异常
*/
public static void writeWorkbookToFile(Workbook book, File file) throws FileNotFoundException, IOException {
if (!file.exists()) {
if (!file.getParentFile().exists()) {
file.getParentFile().mkdirs();
}
file.createNewFile();
}
try (
OutputStream outputStream = new FileOutputStream(file);
) {
writeWorkbookToOutputStream(book, outputStream);
}
}
/**
* 把Workbook对象输出到Excel输出流
*
* @param book Workbook对象
* @param outputStream Excel输出流
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static void writeWorkbookToOutputStream(Workbook book, OutputStream outputStream) throws IOException {
book.write(outputStream);
}
/**
* 输出数据到Workbook对象中指定页码
*
* @param title 标题,写在第一行,可传null
* @param data 数据
* @param book Workbook对象
* @param page 输出数据到Workbook指定页码的页面数
*/
public static void writeDataToWorkbook(List<String> title, List<List<Object>> data, Workbook book, int page) {
Sheet sheet = book.getSheetAt(page);
Row row = null;
Cell cell = null;
// 设置表头
if (null != title && !title.isEmpty()) {
row = sheet.getRow(0);
if (null == row) {
row = sheet.createRow(0);
}
for (int i = 0; i < title.size(); i++) {
cell = row.getCell(i);
if (null == cell) {
cell = row.createCell(i);
}
cell.setCellValue(title.get(i));
}
}
List<Object> rowData = null;
for (int i = 0; i < data.size(); i++) {
row = sheet.getRow(i + 1);
if (null == row) {
row = sheet.createRow(i + 1);
}
rowData = data.get(i);
if (null == rowData) {
continue;
}
for (int j = 0; j < rowData.size(); j++) {
cell = row.getCell(j);
if (null == cell) {
cell = row.createCell(j);
}
setValue(cell, rowData.get(j));
}
}
}
/**
* 读取Excel文件第一页
*
* @param pathname 文件路径名
* @return 第一页数据集合
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static List<List<Object>> readExcelFirstSheet(String pathname) throws IOException {
File file = new File(pathname);
return readExcelFirstSheet(file);
}
/**
* 读取Excel文件第一页
*
* @param file Excel文件
* @return 第一页数据集合
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static List<List<Object>> readExcelFirstSheet(File file) throws IOException {
try (
InputStream inputStream = new FileInputStream(file);
) {
if (file.getName().endsWith(XLS)) {
return readXlsFirstSheet(inputStream);
}
else if (file.getName().endsWith(XLSX)) {
return readXlsxFirstSheet(inputStream);
}
else {
throw new IOException("文件类型错误");
}
}
}
/**
* 读取xls格式Excel文件第一页
*
* @param inputStream Excel文件输入流
* @return 第一页数据集合
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static List<List<Object>> readXlsFirstSheet(InputStream inputStream) throws IOException {
Workbook workbook = new HSSFWorkbook(inputStream);
return readExcelFirstSheet(workbook);
}
/**
* 读取xlsx格式Excel文件第一页
*
* @param inputStream Excel文件输入流
* @return 第一页数据集合
* @throws IOException 错误时抛出异常,由调用者处理
*/
public static List<List<Object>> readXlsxFirstSheet(InputStream inputStream) throws IOException {
Workbook workbook = new XSSFWorkbook(inputStream);
return readExcelFirstSheet(workbook);
}
/**
* 读取Workbook第一页
*
* @param book Workbook对象
* @return 第一页数据集合
*/
public static List<List<Object>> readExcelFirstSheet(Workbook book) {
return readExcel(book, 0);
}
/**
* 读取指定页面的Excel
*
* @param book Workbook对象
* @param page 页码
* @return 指定页面数据集合
*/
public static List<List<Object>> readExcel(Workbook book, int page) {
List<List<Object>> list = new ArrayList<>();
Sheet sheet = book.getSheetAt(page);
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
// 如果当前行为空,则加入空,保持行号一致
if (null == row) {
list.add(null);
continue;
}
List<Object> columns = new ArrayList<>();
for (int j = 0; j < row.getLastCellNum(); j++) {
Cell cell = row.getCell(j);
columns.add(getValue(cell));
}
list.add(columns);
}
return list;
}
/**
* 解析单元格中的值
*
* @param cell 单元格
* @return 单元格内的值
*/
private static Object getValue(Cell cell) {
if (null == cell) {
return null;
}
Object value = null;
switch (cell.getCellType()) {
case BOOLEAN:
value = cell.getBooleanCellValue();
break;
case NUMERIC:
// 日期类型,转换为日期
if (DateUtil.isCellDateFormatted(cell)) {
value = cell.getDateCellValue();
}
// 数值类型
else {
// 默认返回double,创建BigDecimal返回准确值
value = new BigDecimal(cell.getNumericCellValue());
}
break;
default:
value = cell.toString();
break;
}
return value;
}
/**
* 设置单元格值
*
* @param cell 单元格
* @param value 值
*/
private static void setValue(Cell cell, Object value) {
if (null == cell) {
return;
}
if (null == value) {
cell.setCellValue((String) null);
}
else if (value instanceof Boolean) {
cell.setCellValue((Boolean) value);
}
else if (value instanceof Date) {
cell.setCellValue(FORMAT.format((Date) value));
}
else if (value instanceof Double) {
cell.setCellValue((Double) value);
}
else {
cell.setCellValue(value.toString());
}
}
}
App.class
package cn.sfy;
import cn.sfy.utils.ExcelUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Hello world!
*
*/
public class App
{
public static List<File> files =new ArrayList<>();
public static void getFiles(File dir) {
//如果当前文件或目录存在
if(dir.exists()){
//如果是目录,则:
if(dir.isDirectory()){
//打印当前目录的路径
// System.out.println(dir);
//获取该目录下的所有文件和目录组成的File数组
File[] files = dir.listFiles();
//递归遍历每一个子文件
for(File file : files){
getFiles(file);
}
}
else{
files.add(dir);
// System.out.println(dir);
}
}
}
public static String getPhone(String sParam){
if(sParam.length()<=0)
return "";
// Pattern pattern = Pattern.compile("1[345678]\\d{9}");
Pattern pattern = Pattern.compile("1[0123456789]\\d{9}");
Matcher matcher = pattern.matcher(sParam);
// System.out.println(matcher);
StringBuffer bf = new StringBuffer();
while (matcher.find()) {
bf.append(matcher.group()).append(",");
}
int len = bf.length();
if (len > 0) {
bf.deleteCharAt(len - 1);
}
// System.out.println(bf.toString());
return bf.toString();
}
public static void saveCsv(String file,Map<String,Mobile> map){
try {
FileOutputStream fos = new FileOutputStream(file);
OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");
CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("手机号码", "次数");
CSVPrinter csvPrinter = new CSVPrinter(osw, csvFormat);
// csvPrinter = CSVFormat.DEFAULT.withHeader("姓名", "年龄", "家乡").print(osw);
Iterator<Map.Entry<String, Mobile>> iterator = map.entrySet().iterator();
while (iterator.hasNext()){
Map.Entry<String, Mobile> next = iterator.next();
Mobile value = next.getValue();
csvPrinter.printRecord(value.getNumber(),value.getCount());
}
csvPrinter.flush();
csvPrinter.close();
}catch (Exception e){
e.printStackTrace();
}
}
public static void main( String[] args )
{
Map<String,Mobile> mobileMap = new HashMap<>();
getFiles(new File("D:\\testdir"));
int totalfile = files.size();
for (int i = 0; i < files.size(); i++) {
File file = files.get(i);
String fileName = file.getName();
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
if(suffix.equals("xls")||suffix.equals("xlsx")){
try {
Workbook workbook = ExcelUtils.getWorkbookFromExcel(file);
int activeSheetIndex = workbook.getNumberOfSheets();
for (int ii = 0; ii < activeSheetIndex; ii++) {
String sheetName = workbook.getSheetName(ii);
Sheet sheet = workbook.getSheet(sheetName);
int lastRowNum = sheet.getLastRowNum();
for (int i1 = 0; i1 < lastRowNum; i1++) {
Row row = sheet.getRow(i1);
if(row!=null){
short lastCellNum = row.getLastCellNum();
for (int j = 0; j < lastCellNum; j++) {
Cell cell = row.getCell(j);
String stringCellValue = cell.toString();
if(stringCellValue != null){
stringCellValue = stringCellValue.replaceAll(" ","");
String phone = getPhone(stringCellValue);
if(StringUtils.isNotEmpty(phone)){
String[] split = phone.split(",");
for (int i2 = 0; i2 < split.length; i2++) {
String number_phone = split[i2];
Mobile mobile = mobileMap.get(number_phone);
if(mobile==null){
mobileMap.put(number_phone,new Mobile(number_phone,1L));
}else{
mobile.setCount(mobile.getCount()+1);
mobileMap.put(number_phone,mobile);
}
}
// System.out.println(mobile);
}
}
}
// System.out.println();
}
}
}
} catch (Exception e) {
String message = e.getMessage();
if(message!=null&&message.contains("Your file appears not to be a valid OLE2 document")){
//文件可能是直接改后缀名的csv 文件
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));//构造一个BufferedReader类来读取文件
String s = null;
while((s = br.readLine())!=null){//使用readLine方法,一次读一行
s = s.replaceAll(" ","");
String phone = getPhone(s);
String[] split = phone.split(",");
System.out.println(phone);
for (int i2 = 0; i2 < split.length; i2++) {
String number_phone = split[i2];
if(StringUtils.isEmpty(phone)){
continue;
}
Mobile mobile = mobileMap.get(number_phone);
if(mobile==null){
mobileMap.put(number_phone,new Mobile(number_phone,1L));
}else{
mobile.setCount(mobile.getCount()+1);
mobileMap.put(number_phone,mobile);
}
}
}
} catch (Exception e1) {
e1.printStackTrace();
}
System.out.println("=============");
System.out.println(message);
System.out.println(file);
System.out.println("=============");
}
}
// System.out.println(i);
// System.out.println("========================");
// System.out.println(file);
// System.out.println("========================");
} else if(suffix.equals("txt")){
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));//构造一个BufferedReader类来读取文件
String s = null;
while((s = br.readLine())!=null){//使用readLine方法,一次读一行
s = s.replaceAll(" ","");
String phone = getPhone(s);
String[] split = phone.split(",");
System.out.println(phone);
for (int i2 = 0; i2 < split.length; i2++) {
String number_phone = split[i2];
if(StringUtils.isEmpty(phone)){
continue;
}
Mobile mobile = mobileMap.get(number_phone);
if(mobile==null){
mobileMap.put(number_phone,new Mobile(number_phone,1L));
}else{
mobile.setCount(mobile.getCount()+1);
mobileMap.put(number_phone,mobile);
}
}
}
} catch (Exception e1) {
e1.printStackTrace();
}
System.out.println("=============");
System.out.println(file);
System.out.println("=============");
}
}
// System.out.println(mobileMap);
saveCsv("d:\\testcsv\\test2.csv",mobileMap);
// 手机号码 map 保存到csv
// System.out.println(mobileMap);
}
}
初次写文章,可能有些不清楚,如有问题加微信