使用java 语言 提取excel 中的手机号码,xls 格式以及xlsx 格式的excel 文件

使用java 语言 提取excel 中的手机号码,xls 格式以及xlsx 格式的excel 文件

pom.xml 文件 


<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>cn.sfy</groupId>
  <artifactId>sfy-excel</artifactId>
    <packaging>pom</packaging>
    <version>1.0-SNAPSHOT</version>
    <name>sfy-excel</name>
  <!-- FIXME change it to the project's website -->
  <url>http://www.example.com</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.7</maven.compiler.source>
    <maven.compiler.target>1.7</maven.compiler.target>
  </properties>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-csv</artifactId>
      <version>1.8</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-lang3</artifactId>
      <version>3.9</version>
    </dependency>
  </dependencies>

  <build>
    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
      <plugins>
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
        <plugin>
          <artifactId>maven-clean-plugin</artifactId>
          <version>3.1.0</version>
        </plugin>
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
        <plugin>
          <artifactId>maven-resources-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-compiler-plugin</artifactId>
          <version>3.8.0</version>
        </plugin>
        <plugin>
          <artifactId>maven-surefire-plugin</artifactId>
          <version>2.22.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-jar-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-install-plugin</artifactId>
          <version>2.5.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-deploy-plugin</artifactId>
          <version>2.8.2</version>
        </plugin>
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
        <plugin>
          <artifactId>maven-site-plugin</artifactId>
          <version>3.7.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-project-info-reports-plugin</artifactId>
          <version>3.0.0</version>
        </plugin>
      </plugins>
    </pluginManagement>
  </build>
</project>

 

ExcelUtils.class

package cn.sfy.utils;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.*;
import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * <p>
 *  Excel 工具类
 * </p>
 *
 * @package: cn.sfy.utils
 * @description:
 * @author: james9203@163.com    
 * @date: Created in  2021/3/12  17:09
 * @copyright: Copyright (c) 2021
 * @version: V1.0
 * @modified: james9203@163.com 
 */
public class ExcelUtils {

    private static final String XLS = "xls";
    private static final String XLSX = "xlsx";
    private static final DateFormat FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");

    /**
     * 输出数据到自定义模版的Excel输出流
     *
     * @param excelTemplate 自定义模版文件
     * @param data 数据
     * @param outputStream Excel输出流
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static void writeDataToTemplateOutputStream(File excelTemplate, List<List<Object>> data, OutputStream outputStream) throws Exception {
        Workbook book = ExcelUtils.getWorkbookFromExcel(excelTemplate);
        ExcelUtils.writeDataToWorkbook(null, data, book, 0);
        ExcelUtils.writeWorkbookToOutputStream(book, outputStream);
    }

    /**
     * 从Excel文件获取Workbook对象
     *
     * @param excelFile Excel文件
     * @return Workbook对象
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static Workbook getWorkbookFromExcel(File excelFile) throws Exception {
        try {
            Workbook workbook = null;
            try (
                    InputStream inputStream = new FileInputStream(excelFile);
            ) {
                String name = excelFile.getName();
                if(name.endsWith("xls")){
                    workbook = new HSSFWorkbook(inputStream);
                    if(workbook!=null){
                        return  workbook;
                    }
                }
                workbook = WorkbookFactory.create(inputStream);
                if(workbook  != null){
                    return workbook;
                }
                else {
                    throw new IOException("文件类型错误");
                }
            }
        }catch (Exception e){
            throw e;
        }
    }

    /**
     * 把Workbook对象内容输出到Excel文件
     *
     * @param book Workbook对象
     * @param file Excel文件
     * @throws FileNotFoundException 找不到文件异常,文件已创建,实际不存在该异常
     * @throws IOException 输入输出异常
     */
    public static void writeWorkbookToFile(Workbook book, File file) throws FileNotFoundException, IOException {
        if (!file.exists()) {
            if (!file.getParentFile().exists()) {
                file.getParentFile().mkdirs();
            }
            file.createNewFile();
        }
        try (
                OutputStream outputStream = new FileOutputStream(file);
        ) {
            writeWorkbookToOutputStream(book, outputStream);
        }
    }

    /**
     * 把Workbook对象输出到Excel输出流
     *
     * @param book Workbook对象
     * @param outputStream Excel输出流
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static void writeWorkbookToOutputStream(Workbook book, OutputStream outputStream) throws IOException {
        book.write(outputStream);
    }

    /**
     * 输出数据到Workbook对象中指定页码
     *
     * @param title 标题,写在第一行,可传null
     * @param data 数据
     * @param book Workbook对象
     * @param page 输出数据到Workbook指定页码的页面数
     */
    public static void writeDataToWorkbook(List<String> title, List<List<Object>> data, Workbook book, int page) {
        Sheet sheet = book.getSheetAt(page);

        Row row = null;
        Cell cell = null;

        // 设置表头
        if (null != title && !title.isEmpty()) {
            row = sheet.getRow(0);
            if (null == row) {
                row = sheet.createRow(0);
            }

            for (int i = 0; i < title.size(); i++) {
                cell = row.getCell(i);
                if (null == cell) {
                    cell = row.createCell(i);
                }
                cell.setCellValue(title.get(i));
            }
        }

        List<Object> rowData = null;
        for (int i = 0; i < data.size(); i++) {

            row = sheet.getRow(i + 1);
            if (null == row) {
                row = sheet.createRow(i + 1);
            }

            rowData = data.get(i);
            if (null == rowData) {
                continue;
            }
            for (int j = 0; j < rowData.size(); j++) {
                cell = row.getCell(j);
                if (null == cell) {
                    cell = row.createCell(j);
                }
                setValue(cell, rowData.get(j));
            }
        }
    }

    /**
     * 读取Excel文件第一页
     *
     * @param pathname 文件路径名
     * @return 第一页数据集合
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static List<List<Object>> readExcelFirstSheet(String pathname) throws IOException {
        File file = new File(pathname);
        return readExcelFirstSheet(file);

    }

    /**
     * 读取Excel文件第一页
     *
     * @param file Excel文件
     * @return 第一页数据集合
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static List<List<Object>> readExcelFirstSheet(File file) throws IOException {
        try (
                InputStream inputStream = new FileInputStream(file);
        ) {

            if (file.getName().endsWith(XLS)) {
                return readXlsFirstSheet(inputStream);
            }

            else if (file.getName().endsWith(XLSX)) {
                return readXlsxFirstSheet(inputStream);
            }

            else {
                throw new IOException("文件类型错误");
            }
        }

    }

    /**
     * 读取xls格式Excel文件第一页
     *
     * @param inputStream Excel文件输入流
     * @return 第一页数据集合
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static List<List<Object>> readXlsFirstSheet(InputStream inputStream) throws IOException {
        Workbook workbook = new HSSFWorkbook(inputStream);
        return readExcelFirstSheet(workbook);
    }

    /**
     * 读取xlsx格式Excel文件第一页
     *
     * @param inputStream Excel文件输入流
     * @return 第一页数据集合
     * @throws IOException 错误时抛出异常,由调用者处理
     */
    public static List<List<Object>> readXlsxFirstSheet(InputStream inputStream) throws IOException {
        Workbook workbook = new XSSFWorkbook(inputStream);
        return readExcelFirstSheet(workbook);
    }

    /**
     * 读取Workbook第一页
     *
     * @param book Workbook对象
     * @return 第一页数据集合
     */
    public static List<List<Object>> readExcelFirstSheet(Workbook book) {
        return readExcel(book, 0);
    }

    /**
     * 读取指定页面的Excel
     *
     * @param book Workbook对象
     * @param page 页码
     * @return 指定页面数据集合
     */
    public static List<List<Object>> readExcel(Workbook book, int page) {
        List<List<Object>> list = new ArrayList<>();
        Sheet sheet = book.getSheetAt(page);
        for (int i = 0; i <= sheet.getLastRowNum(); i++) {
            Row row = sheet.getRow(i);

            // 如果当前行为空,则加入空,保持行号一致
            if (null == row) {
                list.add(null);
                continue;
            }

            List<Object> columns = new ArrayList<>();
            for (int j = 0; j < row.getLastCellNum(); j++) {
                Cell cell = row.getCell(j);
                columns.add(getValue(cell));
            }

            list.add(columns);
        }

        return list;
    }

    /**
     * 解析单元格中的值
     *
     * @param cell 单元格
     * @return 单元格内的值
     */
    private static Object getValue(Cell cell) {
        if (null == cell) {
            return null;
        }
        Object value = null;
        switch (cell.getCellType()) {
            case BOOLEAN:
                value = cell.getBooleanCellValue();
                break;

            case NUMERIC:

                // 日期类型,转换为日期
                if (DateUtil.isCellDateFormatted(cell)) {
                    value = cell.getDateCellValue();
                }

                // 数值类型
                else {

                    // 默认返回double,创建BigDecimal返回准确值
                    value = new BigDecimal(cell.getNumericCellValue());
                }
                break;

            default:
                value = cell.toString();
                break;
        }

        return value;
    }

    /**
     * 设置单元格值
     *
     * @param cell 单元格
     * @param value 值
     */
    private static void setValue(Cell cell, Object value) {
        if (null == cell) {
            return;
        }

        if (null == value) {
            cell.setCellValue((String) null);
        }

        else if (value instanceof Boolean) {
            cell.setCellValue((Boolean) value);
        }

        else if (value instanceof Date) {
            cell.setCellValue(FORMAT.format((Date) value));
        }

        else if (value instanceof Double) {
            cell.setCellValue((Double) value);
        }

        else {
            cell.setCellValue(value.toString());
        }

    }

}

 

 

App.class 


 

package cn.sfy;

import cn.sfy.utils.ExcelUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Hello world!
 *
 */
public class App 
{
    public  static    List<File> files =new ArrayList<>();

    public static void getFiles(File dir) {
        //如果当前文件或目录存在
        if(dir.exists()){
            //如果是目录,则:
            if(dir.isDirectory()){
                //打印当前目录的路径
//                System.out.println(dir);
                //获取该目录下的所有文件和目录组成的File数组
                File[] files = dir.listFiles();
                //递归遍历每一个子文件
                for(File file : files){
                    getFiles(file);
                }
            }
            else{
                files.add(dir);
//                System.out.println(dir);
            }
        }
    }
    public  static  String getPhone(String sParam){
        if(sParam.length()<=0)
            return "";
//        Pattern pattern = Pattern.compile("1[345678]\\d{9}");
        Pattern pattern = Pattern.compile("1[0123456789]\\d{9}");
        Matcher matcher = pattern.matcher(sParam);
//        System.out.println(matcher);
        StringBuffer bf = new StringBuffer();
        while (matcher.find()) {
            bf.append(matcher.group()).append(",");
        }
        int len = bf.length();
        if (len > 0) {
            bf.deleteCharAt(len - 1);
        }
//        System.out.println(bf.toString());
        return bf.toString();
    }

    public  static  void saveCsv(String file,Map<String,Mobile> map){
        try {
            FileOutputStream fos = new FileOutputStream(file);
            OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");

            CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader("手机号码", "次数");
            CSVPrinter csvPrinter = new CSVPrinter(osw, csvFormat);

//        csvPrinter = CSVFormat.DEFAULT.withHeader("姓名", "年龄", "家乡").print(osw);
            Iterator<Map.Entry<String, Mobile>> iterator = map.entrySet().iterator();
            while (iterator.hasNext()){
                Map.Entry<String, Mobile> next = iterator.next();
                Mobile value = next.getValue();
                csvPrinter.printRecord(value.getNumber(),value.getCount());
            }
            csvPrinter.flush();
            csvPrinter.close();
        }catch (Exception e){
            e.printStackTrace();
        }
    }
    public static void main( String[] args )
    {
        Map<String,Mobile>  mobileMap  =  new HashMap<>();
        getFiles(new File("D:\\testdir"));
        int totalfile = files.size();
        for (int i = 0; i < files.size(); i++) {
            File file = files.get(i);
            String fileName = file.getName();
            String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
            if(suffix.equals("xls")||suffix.equals("xlsx")){
                try {
                    Workbook workbook =  ExcelUtils.getWorkbookFromExcel(file);
                    int activeSheetIndex = workbook.getNumberOfSheets();
                    for (int ii = 0; ii < activeSheetIndex; ii++) {
                        String sheetName = workbook.getSheetName(ii);
                        Sheet sheet = workbook.getSheet(sheetName);
                        int lastRowNum = sheet.getLastRowNum();
                        for (int i1 = 0; i1 < lastRowNum; i1++) {
                            Row row = sheet.getRow(i1);
                            if(row!=null){
                                short lastCellNum = row.getLastCellNum();
                                for (int j = 0; j < lastCellNum; j++) {
                                    Cell cell = row.getCell(j);
                                    String stringCellValue = cell.toString();
                                    if(stringCellValue != null){
                                        stringCellValue =  stringCellValue.replaceAll(" ","");
                                        String phone = getPhone(stringCellValue);
                                        if(StringUtils.isNotEmpty(phone)){
                                            String[] split = phone.split(",");
                                            for (int i2 = 0; i2 < split.length; i2++) {
                                                String number_phone  = split[i2];
                                                Mobile mobile = mobileMap.get(number_phone);
                                                if(mobile==null){
                                                    mobileMap.put(number_phone,new Mobile(number_phone,1L));
                                                }else{
                                                    mobile.setCount(mobile.getCount()+1);
                                                    mobileMap.put(number_phone,mobile);
                                                }
                                            }
//                                            System.out.println(mobile);
                                        }
                                    }
                                }
//                                System.out.println();
                            }
                        }

                    }
                } catch (Exception e) {
                    String message = e.getMessage();
                    if(message!=null&&message.contains("Your file appears not to be a valid OLE2 document")){
                        //文件可能是直接改后缀名的csv 文件
                        try {
                            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));//构造一个BufferedReader类来读取文件
                            String s = null;
                            while((s = br.readLine())!=null){//使用readLine方法,一次读一行
                                s = s.replaceAll(" ","");
                                String phone = getPhone(s);
                                String[] split = phone.split(",");
                                System.out.println(phone);
                                for (int i2 = 0; i2 < split.length; i2++) {
                                    String number_phone  = split[i2];
                                    if(StringUtils.isEmpty(phone)){
                                        continue;
                                    }
                                    Mobile mobile = mobileMap.get(number_phone);
                                    if(mobile==null){
                                        mobileMap.put(number_phone,new Mobile(number_phone,1L));
                                    }else{
                                        mobile.setCount(mobile.getCount()+1);
                                        mobileMap.put(number_phone,mobile);
                                    }
                                }
                            }
                        } catch (Exception e1) {
                            e1.printStackTrace();
                        }
                        System.out.println("=============");
                        System.out.println(message);
                        System.out.println(file);
                        System.out.println("=============");
                    }

                }
//                System.out.println(i);
//                System.out.println("========================");
//                System.out.println(file);
//                System.out.println("========================");

            } else if(suffix.equals("txt")){
                try {
                    BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "gbk"));//构造一个BufferedReader类来读取文件
                    String s = null;
                    while((s = br.readLine())!=null){//使用readLine方法,一次读一行
                        s = s.replaceAll(" ","");
                        String phone = getPhone(s);
                        String[] split = phone.split(",");
                        System.out.println(phone);
                        for (int i2 = 0; i2 < split.length; i2++) {
                            String number_phone  = split[i2];
                            if(StringUtils.isEmpty(phone)){
                                continue;
                            }
                            Mobile mobile = mobileMap.get(number_phone);
                            if(mobile==null){
                                mobileMap.put(number_phone,new Mobile(number_phone,1L));
                            }else{
                                mobile.setCount(mobile.getCount()+1);
                                mobileMap.put(number_phone,mobile);
                            }
                        }
                    }
                } catch (Exception e1) {
                    e1.printStackTrace();
                }
                System.out.println("=============");
                System.out.println(file);
                System.out.println("=============");

            }
        }
//        System.out.println(mobileMap);
        saveCsv("d:\\testcsv\\test2.csv",mobileMap);
        // 手机号码 map 保存到csv
//        System.out.println(mobileMap);
    }
}

 

初次写文章,可能有些不清楚,如有问题加微信

/** * 此代码是完成从excel导入电话号码,将正确的电话号码保存到set集合,因为set集合对于重复的值会覆盖,所以达到了去重复的值的用例,并累计了不正确的电话号码的个数,对电话号码进行了验证有效性。所需要的 dom4j-1.6.1.jar;geronimo-stax-api_1.0_spec-1.0.jar;poi-3.7-20101029.jar;poi-ooxml-3.7-20101029.jar;poi-ooxml-schemas-3.7-20101029.jar;xmlbeans-2.3.0.jar; */ public static void main(String[] args) { Long errorMobileTotal=0L; // 保存正确的电话号码 Set<String> mobileSet = new HashSet<String>(); try { XSSFWorkbook wb = new XSSFWorkbook("E:/workbook1.xlsx"); XSSFSheet sheet = wb.getSheetAt(0); XSSFRow row = null; XSSFCell cell = null; String mobileStr=""; for (int i = 0; i <= sheet.getLastRowNum(); i++) { row = sheet.getRow(i); //System.out.print("第" + i + "行共" + row.getLastCellNum() +"列: "); for (int y = 0; y < row.getLastCellNum(); y++) { cell = row.getCell(y); // 设置字段为字符类型 cell.setCellType(XSSFCell.CELL_TYPE_STRING); // 判断储存格的格式 if (cell != null) { // 取得单元格的值 mobileStr = cell.getStringCellValue(); // 对手机号码进行验证身份正确 if(isMobileNO(mobileStr)) { // 保存正确的手机号码 mobileSet.add(mobileStr); System.out.println("号码"+mobileStr+"正确"); } else { // 累计不正确的电话号码的个数 errorMobileTotal++; System.out.println("不正确的电话号码个数:"+errorMobileTotal); System.out.println("号码"+mobileStr+"不正确"); } } // end (cell != null) }// end 遍历当前行 } // end 遍历当前工作单元sheet System.out.println("总共的行数:"+ (Long.valueOf(sheet.getLastRowNum())+1)); } catch (Exception e) { e.printStackTrace(); } // 因为要去除重复的所以可能有存在替换的字符 System.out.println("不正确的电话号码个数:"+errorMobileTotal); System.out.println("正确的电话号码个数:" + mobileSet.size()); } public static boolean isMobileNO(String mobiles){ Pattern p = Pattern.compile("^(\\+86)*0*((13[0-9])|(15[^4,\\D])|(18[0,5-9]))\\d{8}$"); Matcher m = p.matcher(mobiles); return m.matches(); }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值