package com.mswheat.filecontent;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
public class FileContent {
public static List<String> SupportedFormat = Arrays.asList(".pdf", ".doc", ".docx", ".txt", ".xls", ".xlsx");
public FileContent() {
}
public static boolean support(File file) {
String name = file.getName().toLowerCase();
String ext = name.substring(name.lastIndexOf("."), name.length());
return SupportedFormat.contains(ext);
}
public static String getContent(File file) throws Exception {
if (file != null && file.exists()) {
BodyContentHandler handler = null;
try {
Parser parser = new AutoDetectParser();
InputStream input = new FileInputStream(file);
Metadata meta = new Metadata();
handler = new BodyContentHandler();
parser.parse(input, handler, meta, new ParseContext());
} catch (Exception var5) {
var5.printStackTrace();
}
return handler.toString();
} else {
System.out.println("File Not Exists");
return "";
}
}
public static void main(String[] args) throws Exception {
System.out.println("---PDF");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.pdf")));
System.out.println("---DOC");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.doc")));
System.out.println("---DOCX");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.docx")));
System.out.println("---TXT");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.txt")));
System.out.println("---XLS");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.xls")));
System.out.println("---XLSX");
System.out.println(getContent(new File("C:\\temp\\FileContentTest", "test.xlsx")));
}
}
java读取文件的内容(不乱码)
最新推荐文章于 2024-04-15 05:12:44 发布