java文件格式_Java读取各种文件格式内容

importjava.io.BufferedInputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.text.NumberFormat;importorg.apache.commons.io.FileUtils;importorg.apache.pdfbox.pdmodel.PDDocument;importorg.apache.pdfbox.text.PDFTextStripper;importorg.apache.poi.POIXMLDocument;importorg.apache.poi.hssf.usermodel.HSSFCell;importorg.apache.poi.hssf.usermodel.HSSFRow;importorg.apache.poi.hssf.usermodel.HSSFSheet;importorg.apache.poi.hssf.usermodel.HSSFWorkbook;importorg.apache.poi.hwpf.extractor.WordExtractor;importorg.apache.poi.ss.usermodel.Cell;importorg.apache.poi.xssf.usermodel.XSSFCell;importorg.apache.poi.xssf.usermodel.XSSFRow;importorg.apache.poi.xssf.usermodel.XSSFSheet;importorg.apache.poi.xssf.usermodel.XSSFWorkbook;importorg.apache.poi.xwpf.extractor.XWPFWordExtractor;/***文件内容读取转换器*/

public classReadFileConverter

{public String getContents(String path) throwsException

{

String contents= "";int index = path.lastIndexOf(".");

String file_suffix= path.substring(index+1).toLowerCase();if(file_suffix.equalsIgnoreCase("txt")||file_suffix.equalsIgnoreCase("log")){

contents= this.readTXT(path);

}else if(file_suffix.equalsIgnoreCase("xls")){

contents= this.readXLS(path);

}else if(file_suffix.equalsIgnoreCase("xlsx")){

contents= this.readXLSX(path);

}else if(file_suffix.equalsIgnoreCase("doc")){

contents= this.readDOC(path);

}else if(file_suffix.equalsIgnoreCase("docx")){

contents= this.readDOCX(path);

}else if(file_suffix.equalsIgnoreCase("pdf")){

contents= this.readPDF(path);

}returncontents;

}public String readXLS(String file) throwsException

{

StringBuilder content= newStringBuilder();

HSSFWorkbook workbook= new HSSFWorkbook(newFileInputStream(file));try{for(int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++){if (null !=workbook.getSheetAt(numSheets)){

HSSFSheet aSheet= workbook.getSheetAt(numSheets);//获得一个sheet

for(int rowNumOfSheet = 0; rowNumOfSheet <=aSheet.getLastRowNum(); rowNumOfSheet++){if (null !=aSheet.getRow(rowNumOfSheet)){

HSSFRow aRow= aSheet.getRow(rowNumOfSheet); //获得一个行

for(short cellNumOfRow = 0; cellNumOfRow <=aRow.getLastCellNum(); cellNumOfRow++){if (null !=aRow.getCell(cellNumOfRow)){

HSSFCell aCell= aRow.getCell(cellNumOfRow);//获得列值

if (this.convertCell(aCell).length() > 0){

content.append(this.convertCell(aCell));

}

}

content.append("\n");

}

}

}

}

}

}catch(Exception e){

content.append("xls文件格式不对或损坏");

}finally{if(workbook!=null){

workbook.close();

}

}returncontent.toString();

}public String readXLSX(String file) throwsException

{

StringBuilder content= newStringBuilder();

XSSFWorkbook workbook= newXSSFWorkbook(file);try{for(int numSheets = 0; numSheets

XSSFSheet aSheet= workbook.getSheetAt(numSheets);//获得一个sheet

for(int rowNumOfSheet = 0; rowNumOfSheet <=aSheet.getLastRowNum(); rowNumOfSheet++){if (null !=aSheet.getRow(rowNumOfSheet)){

XSSFRow aRow= aSheet.getRow(rowNumOfSheet); //获得一个行

for(short cellNumOfRow = 0; cellNumOfRow <=aRow.getLastCellNum(); cellNumOfRow++){if (null != aRow.getCell(cellNumOfRow)){

XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值

if (this.convertCell(aCell).length() > 0){

content.append(this.convertCell(aCell));

}

}

content.append("\n");

}

}

}

}

}

}catch(Exception e){

content.append("xlsx文件格式不对或损坏");

}

finally{

if(workbook!=null){

workbook.close();

}

}

return content.toString();

}

public String readTXT(String file) throws Exception

{

String contents = "";

try{

String encoding = this.get_charset(new File(file));

if (encoding.equalsIgnoreCase("GBK")) {

contents = FileUtils.readFileToString(new File(file), "gbk");

} else {

contents = FileUtils.readFileToString(new File(file), "utf8");

}

}catch(Exception e){

contents = "txt文件格式不对或损坏";

}

return contents;

}

public String readDOC(String file) throws Exception

{

String returnStr;

WordExtractor wordExtractor = new WordExtractor(new FileInputStream(new File(file)));

try{

returnStr = wordExtractor.getText();

}catch(Exception e){

returnStr="doc文件格式不对或损坏";

}

finally{

if(wordExtractor != null){

wordExtractor.close();

}

}

return returnStr;

}

public String readDOCX(String file) throws Exception

{

String docx;

XWPFWordExtractor xwp= new XWPFWordExtractor(POIXMLDocument.openPackage(file));

try{

docx= xwp.getText();

}catch(Exception e){

docx="docx文件格式不对或损坏";

}

finally{

if(xwp !=null){

xwp.close();

}

}

return docx;

}

public String readPDF(String file) throws Exception

{

String result = null;

FileInputStream is = null;

PDDocument document = null;

try{

is = new FileInputStream(file);

document = PDDocument.load(is);

PDFTextStripper stripper = new PDFTextStripper();

result = stripper.getText(document);

}catch(Exception e){

result="pdf文件格式不对或损坏";

}

finally{

if (is != null){

is.close();

}

if (document != null){

document.close();

}

}

return result;

}

private String get_charset(File file) throws IOException

{

String charset = "GBK";

byte[] first3Bytes = new byte[3];

BufferedInputStream bis = null;

try {

boolean checked = false;

bis = new BufferedInputStream(new FileInputStream(file));

bis.mark(0);

int read = bis.read(first3Bytes, 0, 3);

if (read == -1)

return charset;

if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {

charset = "UTF-16LE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xFE&& first3Bytes[1] == (byte) 0xFF) {

charset = "UTF-16BE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xEF&& first3Bytes[1] == (byte) 0xBB&& first3Bytes[2] == (byte) 0xBF) {

charset = "UTF-8";

checked = true;

}

bis.reset();

if (!checked) {

// int len = 0;

int loc = 0;

while ((read = bis.read()) != -1) {

loc=loc+1;

if (read >= 0xF0)

break;

if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的,也算是GBK

break;

if (0xC0 <= read && read <= 0xDF) {

read = bis.read();

if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF)

// (0x80

// - 0xBF),也可能在GB编码内

continue;

else

break;

} else if (0xE0 <= read && read <= 0xEF) {// 也有可能出错,但是几率较小

read = bis.read();

if (0x80 <= read && read <= 0xBF) {

read = bis.read();

if (0x80 <= read && read <= 0xBF) {

charset = "UTF-8";

break;

} else

break;

} else

break;

}

}

}

} catch (Exception e) {

e.printStackTrace();

} finally {

if (bis != null) {

bis.close();

}

}

return charset;

}

@SuppressWarnings("deprecation")

private String convertCell(Cell cell)

{

NumberFormat formater = NumberFormat.getInstance();

formater.setGroupingUsed(false);

String cellValue = "";

if (cell == null) {

return cellValue;

}

switch (cell.getCellTypeEnum()) {

case NUMERIC:

cellValue = formater.format(cell.getNumericCellValue());

break;

case STRING:

cellValue = cell.getStringCellValue();

break;

case BLANK:

cellValue = cell.getStringCellValue();

break;

case BOOLEAN:

cellValue = Boolean.valueOf(cell.getBooleanCellValue()).toString();

break;

case ERROR:

cellValue = String.valueOf(cell.getErrorCellValue());

break;

default:

cellValue = "";

}

return cellValue.trim();

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值