友情提示:为了方便,整理了导入本地文档功能,包含标题中几种格式文档,以备不时之需。
一、所需jar包
这里只介绍 pom 文件引入jar 包的配置。如下:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
二、测试代码
package com.cdvcloud.thread.file;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.springframework.util.StringUtils;
public class FileImport {
private static final String DEFULTCHARSET = "GB2312";
public static void main(String[] args) {
String path="C:/Users/test/Desktop/ceshi2.docx";
String path2="C:/Users/test/Desktop/test2.xlsx";
String path3="C:/Users/test/Desktop/test.xls";
String path4="C:/Users/test/Desktop/test.txt";
String path5="C:/Users/test/Desktop/ceshi.doc";
String string = importFile(path,null);
String string2 = importFile(path2,"GB2312");
String string3 = importFile(path3,null);
String string4 = importFile(path4,null);
String string5 = importFile(path5,null);
System.out.println("docx===>>"+string);
System.out.println("xlsx===>>"+string2);
System.out.println("xls===>>"+string3);
System.out.println("txt===>>"+string4);
System.out.println("doc===>>"+string5);
}
/**
* 读取文件统一入口
* @param path
* @return
*/
static String importFile(String path,String charset){
String resTextString = null;
if (StringUtils.isEmpty(path)) {
return resTextString;
}
try {
int lastIndexOf = path.lastIndexOf(".");
String type = path.substring(lastIndexOf+1);
if ("txt".equals(type)) {
resTextString = importTxt(path,charset);
}else if ("doc".equals(type)) {
resTextString = importWord3(path);
}else if ("docx".equals(type)) {
resTextString = importWord7(path);
}else if ("xls".equals(type)||"xlsx".equals(type)) {
resTextString = importExcel(path, charset);
}
} catch (Exception e) {
e.printStackTrace();
}
return resTextString;
}
/**
* 读取word2003
* @throws
*/
static String importWord3(String path){
String textString=null;
FileInputStream inputStream =null;
try {
inputStream = new FileInputStream(path);
HWPFDocument doc = new HWPFDocument(inputStream);
textString = doc.getText().toString();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inputStream!=null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return textString;
}
/**
* 读取word2007
* @param path
* @return
*/
static String importWord7(String path){
String text = null;
OPCPackage openPackage =null;
try {
openPackage = POIXMLDocument.openPackage(path);
XWPFWordExtractor word = new XWPFWordExtractor(openPackage);
text = word.getText();
} catch (Exception e) {
e.printStackTrace();
}finally {
if (openPackage!=null) {
try {
openPackage.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return text;
}
/**
* 导入txt 文件
* @param path
* @return
*/
// try-with-resources可以优雅关闭文件,异常时自动关闭文件
static String importTxt(String path,String charset){
String resText=null;
if (StringUtils.isEmpty(charset)) {
charset=DEFULTCHARSET;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(new FileInputStream(path),charset);
BufferedReader br = new BufferedReader(reader);
String line;
while ((line=br.readLine())!=null) {
resText+=line;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader!=null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return resText;
}
/**
* 导入.xls文档
* @param path
* @param charset
* @return
*/
static String importExcel(String path,String charset){
String resString = "";
Workbook wb=null;
InputStream is =null;
int lastIndexOf = path.lastIndexOf(".");
String type = path.substring(lastIndexOf+1);
try {
is = new FileInputStream(path);
if ("xls".equals(type)) {
wb = new HSSFWorkbook(is);
}else if ("xlsx".equals(type)) {
wb = new XSSFWorkbook(is);
}else {
return resString;
}
if (wb!=null) {
resString = getExcelVal(wb);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (is!=null) {
try {
is.close();
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
return resString;
}
/**
* 获取值
*/
public static String getExcelVal(Workbook wb){
Sheet sheet=null;
Row row =null;
String cellData = null;
String resString = "";
//用来存放表中数据
List<ArrayList<String>> list = new ArrayList<ArrayList<String>>();
//Excel的页签数量
int sheetNum = wb.getNumberOfSheets();
for (int m = 0; m<sheetNum; m++) {
//获取一个sheet
sheet = wb.getSheetAt(m);
Sheet sheetAt = wb.getSheetAt(0);
//获取最大行数
int rownum = sheet.getPhysicalNumberOfRows();
//获取第一行
row = sheet.getRow(0);
if (row==null) {
continue;
}
//获取最大列数
int colnum = row.getPhysicalNumberOfCells();
for (int i = 0; i<rownum; i++) {
row = sheet.getRow(i);
ArrayList<String> colList = new ArrayList<String>();
if(row !=null){
for (int j=0;j<colnum;j++){
cellData = String.valueOf(getCellFormatValue(row.getCell(j)));
colList.add(cellData);
if (j<colnum-1) {
resString+=cellData+" ";
}else{
resString+=cellData+"\n";
}
}
}else{
break;
}
list.add(colList);
}
}
return resString;
}
/**
* 根据类型取值
* @param cell
* @return
*/
@SuppressWarnings("deprecation")
public static Object getCellFormatValue(Cell cell){
Object cellValue = null;
if(cell!=null){
//判断cell类型
switch(cell.getCellType()){
case Cell.CELL_TYPE_NUMERIC:{
cellValue = String.valueOf(cell.getNumericCellValue());
break;
}
case Cell.CELL_TYPE_FORMULA:{
//判断cell是否为日期格式
if(DateUtil.isCellDateFormatted(cell)){
//转换为日期格式YYYY-mm-dd
cellValue = cell.getDateCellValue();
}else{
//数字
cellValue = String.valueOf(cell.getNumericCellValue());
}
break;
}
case Cell.CELL_TYPE_STRING:{
cellValue = cell.getRichStringCellValue().getString();
break;
}
default:
cellValue = "";
}
}else{
cellValue = "";
}
return cellValue;
}
}
【注】txt文档可能会乱码,请将参数设置为txt 对应的编码格式。一般默认为GB2312 或者 gbk 。