1.DOC_SUFFIX = {".doc",".docx"};
2.TXT_SUFFIX = {".txt",".csv"};
3.EXCEL_SUFFIX = {".xls",".xlsx"};
4.PDF_SUFFIX = {".pdf"};
5.ZIP_SUFFIX = {".zip"};
import lombok.Getter;
import net.lingala.zip4j.core.ZipFile;
import net.lingala.zip4j.exception.ZipException;
import net.lingala.zip4j.model.ZipParameters;
import net.lingala.zip4j.util.Zip4jConstants;
import org.apache.commons.lang.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.taskdefs.Expand;
import org.apache.tools.ant.taskdefs.Zip;
import org.apache.tools.ant.types.FileSet;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* 文件操作工具类 <BR>
*
*
*/
public class FileOperatorHelper {
public final static String[] DOC_SUFFIX = {".doc",".docx"};
public final static String[] TXT_SUFFIX = {".txt",".csv"};
public final static String[] EXCEL_SUFFIX = {".xls",".xlsx"};
public final static String[] PDF_SUFFIX = {".pdf"};
public final static String[] ZIP_SUFFIX = {".zip"};
// 创建
/**
* @Description: 创建文件夹
* @Time 2019/7/16 11:23
*/
private static String mkdir(String path){
File file = new File(path);
if(!file.exists()){
file.mkdir();
}else{
path = path + (int)(Math.random()*100);
mkdir(path);
}
return path;
}
// 打开
public static String open(String path) throws OpenXML4JException, XmlException, IOException {
if (!path.contains(".")){
throw new IllegalArgumentException("请输入一个文件路径");
}
String suffix = path.substring(path.lastIndexOf("."));
String text = "";
if(contains(DOC_SUFFIX,suffix)){
text = openDoc(new File(path));
}
if(contains(TXT_SUFFIX,suffix)){
text = openTxt2(path);
}
if(contains(PDF_SUFFIX,suffix)){
text = openPdf(new File(path));
}
if(contains(EXCEL_SUFFIX,suffix)){
text = openExcel(path);
}
return text;
}
private static boolean contains(String[] arr,String str){
for (String s : arr) {
if(StringUtils.equals(s,str)){
return true;
}
}
return false;
}
public static boolean isZip(String path){
if (!path.contains(".")){
throw new IllegalArgumentException("请输入一个文件路径");
}
String suffix = path.substring(path.lastIndexOf("."));
return contains(ZIP_SUFFIX,suffix);
}
// 文件类型
// 1 文档格式
/**
* @Description: 提取word文档中的内容
* @Time 2019/7/29 11:57
*/
public static String openDoc(File file) throws IOException, OpenXML4JException, XmlException {
FileInputStream in = null;
String text = null;
try {
// 输入流
in = new FileInputStream(file);
// 文本提取实例
WordExtractor extractor = new WordExtractor(in);
// 返回正文
text = extractor.getText();
} catch (OfficeXmlFileException e){
XWPFWordExtractor extractor = new XWPFWordExtractor(POIXMLDocument.openPackage(file.getPath()));
text = extractor.getText();
}finally {
if(in != null ){
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return text;
}
// 2 文本格式与csv格式
public static BufferedReader openTxt(String path){
BufferedReader br = null;
try {
InputStream in = new FileInputStream(path);
InputStreamReader reader = new InputStreamReader(in,SftpServiceImpl.charset);
br = new BufferedReader(reader);
return br;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e){
e.printStackTrace();
}
return br;
}
public static String openTxt2(String path) throws IOException {
File file = new File(path);
InputStream in = new FileInputStream(file);
InputStreamReader reader = new InputStreamReader(in,SftpServiceImpl.charset);
BufferedReader br = new BufferedReader(reader);
StringBuilder sb = new StringBuilder();
String line;
while ((line=br.readLine())!=null){
sb.append(line);
}
return sb.toString();
}
// 3.excel格式
public static String openExcel(String path) throws OpenXML4JException, IOException {
String text ;
POIFSFileSystem fs = null;
OPCPackage pkg = null;
try{
fs = new POIFSFileSystem(new File(path));
// get the Workbook (excel part) stream in a InputStream
InputStream din = fs.createDocumentInputStream("Workbook");
// construct out HSSFRequest object
HSSFRequest req = new HSSFRequest();
// lazy listen for ALL records with the listener shown above
ReadExcel2003 reader = new ReadExcel2003();
req.addListenerForAllRecords(reader);
// create our event factory
HSSFEventFactory factory = new HSSFEventFactory();
// process our events based on the document input stream
factory.processEvents(req, din);
// and our document input stream (don't want to leak these!)
din.close();
text = reader.getContent();
}catch (OfficeXmlFileException e){
pkg = OPCPackage.open(path);
XSSFReader xssfReader = new XSSFReader(pkg);
SharedStringsTable sst = xssfReader.getSharedStringsTable();
StringBuilder sb = new StringBuilder();
List<CTRst> strings = sst.getItems();
for (CTRst ctRst : strings) {
sb.append(ctRst.getT()).append(",");
}
text = sb.toString();
}finally {
// once all the events are processed close our file input stream
if( fs != null ){
fs.close();
}
if(pkg != null){
pkg.close();
}
}
return text;
}
// 4.zip格式
/**
* @Description: 文件解压缩(文件不加密)
* @Time 2019/7/16 11:36
*/
public static String unZip(String srcPath){
String dstDir = null;
String expansion = srcPath.substring(srcPath.lastIndexOf("."));
if(expansion.equals(".zip")){
dstDir = mkdir(srcPath.substring(0,srcPath.lastIndexOf(".")));
Project p = new Project();
Expand e = new Expand();
e.setProject(p);
e.setSrc(new File(srcPath));
e.setOverwrite(false);
e.setDest(new File(dstDir));
e.setEncoding(SftpServiceImpl.charset);//ant下的zip工具默认压缩编码为UTF-8编码,而winRAR软件压缩是用的windows默认的GBK或者GB2312编码,所以解压缩时要制定编码格式
e.execute();
}
return dstDir;
}
/**
* @Description: 加密文件解压缩
* @Time 2019/7/29 15:05
*/
public static String unZip(String srcPath,String password) throws ZipException {
String dest = null;
String expansion = srcPath.substring(srcPath.lastIndexOf("."));
if(expansion.equals(".zip")) {
dest = mkdir(srcPath.substring(0, srcPath.lastIndexOf(".")));
File zipFile = new File(srcPath);
ZipFile zFile = new ZipFile(zipFile); // 首先创建ZipFile指向磁盘上的.zip文件
zFile.setFileNameCharset(SftpServiceImpl.charset);
File destDir = new File(dest); // 指定解压目录
if (zFile.isEncrypted()) {
zFile.setPassword(password.toCharArray()); // 设置密码
}
zFile.extractAll(dest); // 将文件抽出到解压目录(解压)
/*List<net.lingala.zip4j.model.FileHeader> headerList = zFile.getFileHeaders();
List<File> extractedFileList = new ArrayList<>();
for (FileHeader fileHeader : headerList) {
if (!fileHeader.isDirectory()) {
extractedFileList.add(new File(destDir, fileHeader.getFileName()));
}
}
File[] extractedFiles = new File[extractedFileList.size()];
extractedFileList.toArray(extractedFiles);
for (File f : extractedFileList) {
System.out.println(f.getAbsolutePath() + "文件解压成功!");
}*/
}
return dest;
}
// 无加密压缩文件 compress
public static void compress(String dir,String zipFilepath){
File file = new File(dir);
if (!file.exists()) {
throw new RuntimeException("source file or directory " + dir + " does not exist.");
}
Project proj = new Project();
FileSet fileSet = new FileSet();
fileSet.setProject(proj);
// 判断是目录还是文件
if (file.isDirectory()) {
fileSet.setDir(file);
// ant中include/exclude规则在此都可以使用
// 比如:
fileSet.setExcludes("**/*.error");
// fileSet.setIncludes("**/*.xls");
} else {
fileSet.setFile(file);
}
Zip zip = new Zip();
zip.setProject(proj);
zip.setDestFile(new File(zipFilepath));
zip.addFileset(fileSet);
zip.setEncoding(SftpServiceImpl.charset);
zip.execute();
System.out.println("compress successed.");
}
// 加密压缩文件
public static void compress(String dir,String zipFilepath,String passwd) throws ZipException {
File srcFile = new File(dir);
ZipFile zipFile = new ZipFile(zipFilepath);
ZipParameters parameters = new ZipParameters();
parameters.setCompressionMethod(Zip4jConstants.COMP_DEFLATE); // 压缩方式
parameters.setCompressionLevel(Zip4jConstants.DEFLATE_LEVEL_NORMAL); // 压缩级别
if (!StringUtils.isEmpty(passwd)) {
parameters.setEncryptFiles(true);
parameters.setEncryptionMethod(Zip4jConstants.ENC_METHOD_STANDARD); // 加密方式
parameters.setPassword(passwd.toCharArray());
}
if (srcFile.isDirectory()) {
List<File> subFiles = list(dir);
//ArrayList<File> temp = new ArrayList<>();
// 过滤不符合要求文件
for (File file : subFiles) {
if(file.getName().matches("^.*\\.error$")){
//temp.add(file);
delete(file.getPath());
}
}
//zipFile.addFiles(temp, parameters);
zipFile.addFolder(srcFile, parameters);
} else {
if(!dir.matches("^.*\\.error$")){
zipFile.addFile(srcFile, parameters);
}
}
}
// 5.pdf格式
private static String openPdf(File file) throws IOException {
// pdf文件实例化为PDDocument对象
PDDocument doc = PDDocument.load(file);
// 创建pdf文本剥离器实例
PDFTextStripper pdfTextStripper = new PDFTextStripper();
// 剥离文本
String text = pdfTextStripper.getText(doc);
doc.close();
return text;
}
// 输出格式
// IO流、byte[]、String
/**
* @Description: 删除指定文件或文件夹
* @Time 2019/7/16 11:27
*/
public static void delete(String path){
File file = new File(path);
if(file.exists()){
File[] files = file.listFiles();
if(!file.isDirectory()||files == null){
file.delete();
return;
}
for (File f : files) {
delete(f.getPath());
}
file.delete();
}
}
/**
* @Description: http 设置content-Type参数
* @Time 2019/7/16 11:15
*/
/*public static String getMime(String path){
String expansion = path.substring(path.lastIndexOf(".")+1);
return Mime.getMime(expansion);
}*/
/**
* @Description: 判断指定路径下的文件或目录是否存在
* @Time 2019/7/17 10:18
*/
public static boolean exists(String path){
File file = new File(path);
return file.exists();
}
/**
* @Description: 文件列表
* @Time 2019/7/16 11:56
*/
public static List<File> list(String path){
List<File> files = new ArrayList<>();
File file = new File(path);
if(file.isDirectory()){
File[] listFiles = file.listFiles();
for (File f : listFiles) {
files.addAll(list(f.getPath()));
}
}else {
String str = unZip(file.getPath());
if (str == null){
files.add(file);
}else{
files.addAll(list(str));
delete(file.getPath());
}
}
return files;
}
public static void main(String[] args) throws ZipException {
compress("C:\\Users\\Lenovo\\Desktop\\数据存储层","C:\\Users\\Lenovo\\Desktop\\controller.zip","123456");
System.out.println("compress complete");
}
/**
* @Description: 获取输出流
* @Time 2019/7/29 13:45
*/
public static BufferedWriter write(String path){
BufferedWriter bw = null;
try {
OutputStream out = new FileOutputStream(path);
OutputStreamWriter writer = new OutputStreamWriter(out,"gbk");
bw = new BufferedWriter(writer);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e){
e.printStackTrace();
}
return bw;
}
}
class ReadExcel2003 implements HSSFListener{
@Getter
private String content;
@Override
public void processRecord(Record record) {
switch (record.getSid()){
case SSTRecord.sid:
SSTRecord sstRecord = (SSTRecord) record;
content = toText(sstRecord);
break;
}
}
private String toText(SSTRecord sstRecord){
StringBuilder sb = new StringBuilder();
for(int i=0;i<sstRecord.getNumUniqueStrings();i++){
sb.append(sstRecord.getString(i)).append(",");
//System.out.println(sstRecord.getString(i));
}
return sb.toString();
}
}