java 中的文件读取信息


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.pdfbox.encryption.DecryptDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import org.textmining.text.extraction.WordExtractor;


public class FileReader
{
/**
* Description:解析文件,返回文档内容 <br>
*
*
* @param logger
* Longger
* @param realPath
* String
* @return String
*/

public static String readOneFile(Logger logger, String realPath)
{
FileInputStream in = null;
BufferedReader reader = null;

String content = "";
try
{
File file = new File(realPath);

if (!file.canRead())
{
logger.error(MessageCode.getPageMessage("iiss.info.common.filenotread") + realPath);
return "";
}

String suffix = realPath.substring(realPath.lastIndexOf(".") + 1, realPath.length());

if ("doc".equalsIgnoreCase(suffix))
{
WordExtractor extractor = new WordExtractor();
content = extractor.extractText(new FileInputStream(file));

if (logger.isDebugEnabled())
{
logger.debug("summary=" + content);
}
}//解析excel文件
else if ("xls".equalsIgnoreCase(suffix))
{
StringBuffer rowData = new StringBuffer(KeyConstant.INITIAL_BUFFER);

in = new FileInputStream(file);
// 创建对Excel对象
HSSFWorkbook workbook = new HSSFWorkbook(in);
//获得excel的页数
int sheetNo = workbook.getNumberOfSheets();

if (sheetNo > 0)
{
for (int i = 0; i < sheetNo; i++)
{
// 获取每一页对象
HSSFSheet sheet = workbook.getSheetAt(i);

if (sheet != null)
{
//逐行获得内容
HSSFRow row = null;
for (int j = 0; j <= sheet.getLastRowNum(); j++)
{
try
{
row = sheet.getRow(j);
}
catch (Exception e)
{
row = null;
}

if (row == null)
{
continue;
}

//逐个单元格获得内容
HSSFCell cell = null;
String fieldValue = null;
for (int k = 0; k <= row.getLastCellNum() - 1; k++)
{
try
{
cell = row.getCell((short)k);
}
catch (Exception e)
{
cell = row.createCell((short)k);
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
cell.setCellValue("");
}

fieldValue = POITools.getCellValue(cell);

if (fieldValue != null && !"".equals(fieldValue))
{
rowData.append(fieldValue);
rowData.append("");
}

}

}
}

}
}

content = rowData.toString();

if (logger.isDebugEnabled())
{
logger.debug("summary=" + content);
}
}
else if ("txt".equalsIgnoreCase(suffix))
{
in = new FileInputStream(file);
reader = new BufferedReader(new InputStreamReader(in));

StringBuffer sBuffer = new StringBuffer();
String s = null;
do
{
s = reader.readLine();
if (s != null)
{
sBuffer.append(s);
}
} while (s != null);

content = sBuffer.toString();

if (logger.isDebugEnabled())
{
logger.debug("summary=" + content);
}
}
else if ("html".equalsIgnoreCase(suffix) || "htm".equalsIgnoreCase(suffix))
{
HTMLParser parser = new HTMLParser(file);
content = parser.getContent();
}
else if ("pdf".equalsIgnoreCase(suffix))
{
PDDocument pdf = null;
try
{
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
pdf = parser.getPDDocument();
if (pdf.isEncrypted())
{
DecryptDocument decryptor = new DecryptDocument(pdf);
decryptor.decryptDocument("");
}

PDFTextStripper stripper = new PDFTextStripper();
content = stripper.getText(pdf);
if (logger.isDebugEnabled())
{
logger.debug("summary=" + content);
}
}
catch (Exception e)
{
logger.error(e, e);
}
catch (OutOfMemoryError t)
{
logger.error(t, t);
}
finally
{
try
{
if (pdf != null)
{
pdf.close();
}

}
catch (IOException e)
{
logger.error(MessageCode.getPageMessage("iiss.info.common.readfilefail") + realPath);
}
}
}
else
{
content = " ";
}
}
catch (FileNotFoundException e)
{
logger.error(MessageCode.getPageMessage("iiss.info.common.filenotfound") + realPath);
}
catch (IOException e)
{
logger.error(MessageCode.getPageMessage("iiss.info.common.readfilefail") + realPath);
}
catch (InterruptedException e)
{
logger.error(MessageCode.getPageMessage("iiss.info.common.readhtmlfilefail") + realPath);
}
catch (Exception e)
{
logger.error(MessageCode.getPageMessage("iiss.info.common.parsefilefail") + e, e);
}
finally
{
try
{
if (in != null)
{
in.close();
}

}
catch (IOException e)
{
logger.error(e, e);
}

try
{
if (reader != null)
{
reader.close();
}

}
catch (IOException e)
{
logger.error(e, e);
}
}
return content;
}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值