使用poi可以解析文件,这里列举了解析.doc .docx .xls .xlxs .txt文件的方法
首先要引入jar包,这个网上可以找到
package com.yj;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
/**
* 升级版,bulingbuling可以根据文件后缀自动找到解析方法找到
* @author lalala
*@data 2018.7.25
*/
public class poiTest {
private static HSSFWorkbook hssfWorkbook;
private static POIXMLTextExtractor ex;
private static XSSFWorkbook xssfWorkbook;
public static void main(String[] args) {
try {
//得到路径,这个是我自己的文件路径 ,可以获得前台传过来的文件夹名,按需求来判断
String path="D:\\ali";
File file=new File(path);
String test[];
test=file.list();
for(int i=0;i<test.length;i++) {
File f=new File(test[i]);
//得到文件名
String fileName=f.getName();
System.out.println(fileName);
//获取文件后缀名
String prefix=fileName.substring(fileName.lastIndexOf(".")+1);
//将文件作为输出流输出
String path1=path+"\\"+fileName;
FileInputStream fis=new FileInputStream(path1);
System.out.println(path1);
String ss="";
//根据后缀分别解析.doc
if("doc".equals(prefix)) {
WordExtractor wordExtractor;
try {
wordExtractor=new WordExtractor(fis);
String[] paragraph=wordExtractor.getParagraphText();
for(int j=0;j<paragraph.length;j++) {
ss+=paragraph[j].toString();
System.out.println(ss);
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
//根据后缀分别解析.docx
if("docx".equals(prefix)) {
try {
XWPFDocument xwdoc=new XWPFDocument(POIXMLDocument.openPackage(path1));
ex = new XWPFWordExtractor(xwdoc);
ss=ex.getText().trim();
System.out.println(ss);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
//根据后缀分别解析.xls
//用的非常少了,一般都是xlsx格式
if("xls".equals(prefix)) {
try {
hssfWorkbook = new HSSFWorkbook(fis);
//循环工作表 sheet
for(int numSheet=0;numSheet<hssfWorkbook.getNumberOfSheets();numSheet++) {
HSSFSheet hssfSheet=hssfWorkbook.getSheetAt(numSheet);
if(hssfSheet==null){
continue;
}
//循环行row
for(int rowNum=0;rowNum<=hssfSheet.getLastRowNum();rowNum++) {
HSSFRow hssfRow=hssfSheet.getRow(rowNum);
if(hssfRow==null) {
continue;
}
//循环列Cell
for(int cellNum=0;cellNum<hssfRow.getLastCellNum();cellNum++) {
HSSFCell hssfCell=hssfRow.getCell(cellNum);
if(hssfCell==null) {
continue;
}
System.out.print(getValue(hssfCell)+"\t");
}
System.out.println();
}
System.out.println();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//根据后缀分别解析.xlsx
if("xlsx".equals(prefix)) {
try {
xssfWorkbook = new XSSFWorkbook(fis);
//循环工作表 sheet
for(int numSheet=0;numSheet<xssfWorkbook.getNumberOfSheets();numSheet++) {
XSSFSheet xssfSheet=xssfWorkbook.getSheetAt(numSheet);
if(xssfSheet==null){
continue;
}
//循环行row
for(int rowNum=0;rowNum<=xssfSheet.getLastRowNum();rowNum++) {
XSSFRow xssfRow=xssfSheet.getRow(rowNum);
if(xssfRow==null) {
continue;
}
//循环列Cell
for(int cellNum=0;cellNum<xssfRow.getLastCellNum();cellNum++) {
XSSFCell hssfCell=xssfRow.getCell(cellNum);
if(hssfCell==null) {
continue;
}
System.out.print(getValue2(hssfCell)+"\t");
}
System.out.println();
}
System.out.println();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//后缀名为.txt
if("txt".equals(prefix)) {
BufferedReader reader=null;
try {
reader=new BufferedReader(new FileReader(path1));
while(reader.ready()) {
ss+=reader.readLine();
System.out.println(ss);
}
reader.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
//得到value 过时的写法,最新版不是很会
@SuppressWarnings({ "static-access", "deprecation" })
private static String getValue(HSSFCell hssfCell){
if(hssfCell.getCellType() == hssfCell.CELL_TYPE_BOOLEAN){
return String.valueOf( hssfCell.getBooleanCellValue());
}else if(hssfCell.getCellType() == hssfCell.CELL_TYPE_NUMERIC){
return String.valueOf( hssfCell.getNumericCellValue());
}else{
return String.valueOf( hssfCell.getStringCellValue());
}
}
//得到value 过时的写法,最新版不是很会
@SuppressWarnings({ "static-access", "deprecation" })
private static String getValue2(XSSFCell hssfCell){
if(hssfCell.getCellType() == hssfCell.CELL_TYPE_BOOLEAN){
return String.valueOf( hssfCell.getBooleanCellValue());
}else if(hssfCell.getCellType() == hssfCell.CELL_TYPE_NUMERIC){
return String.valueOf( hssfCell.getNumericCellValue());
}else{
return String.valueOf( hssfCell.getStringCellValue());
}
}
}
结果:
111.txt
D:\ali\111.txt
你好
123.doc
D:\ali\123.doc
哈哈哈哈
123.docx
D:\ali\123.docx
哈哈哈哈哈哈
123.txt
D:\ali\123.txt
hello
222.txt
D:\ali\222.txt
略略略
234.txt
D:\ali\234.txt
bulingbuling
hello.xlsx
D:\ali\hello.xlsx
学号 姓名 班级
1.0 1.0 1.0
helo.xls
D:\ali\helo.xls
1.0 2.0 3.0
Test.xls
D:\ali\Test.xls
姓名 班级 学分
1.0 1.0 1.0
2.0 2.0 2.0
3.0 3.0 3.0
车型 价格
bmw 1000000.0
qq 10000.0