将文件(txt,doc,docx,xlsx,xls,pdf)内容转化为base64编码;读取文件的内容;
运行结果
1.添加maven依赖 版本自行更改
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.8</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-excelant</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.16</version>
</dependency>
2.在domain创建文件实体类fileDTO
public class fileDTO {
private String id;
private String name;
private String type;
private String content;
public fileDTO() {}
public fileDTO(String id, String name, String type, String content) {
this.id = id;
this.name = name;
this.type = type;
this.content = content;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "[ id: "+id+" name: "+name +" type: "+ type +" content: "+content+"]";
}
}
3.根据文件类型读取其文件内容,并将内容转化为base64编码
//将文件内容转化为base64编码
void readFile() throws IOException {
//文件路径
File file = new File("D:/test/测试.pdf");
fileDTO filedto = new fileDTO();
filedto.setName(file.getName());
System.out.println(filedto.getName());
filedto.setType(file.getName().substring(file.getName().lastIndexOf(".") + 1));
// System.out.println(filedto.getType());
byte[] bytes = null;
//根据类型读取文件内容
//docx
if(filedto.getType().equals("docx")){
System.out.println("docx");
bytes = getdocxContent(file.getPath());
}
//txt
else if(filedto.getType().equals("txt")){
System.out.println("txt");
bytes = gettxtContent(file);
}
//doc
else if(filedto.getType().equals("doc")){
System.out.println("doc");
bytes = getdocContent(file.getPath());
}
//xlsx
else if(filedto.getType().equals("xlsx")){
System.out.println("xlsx");
bytes = getxlsxContent(file.getPath());
}
//xls
else if(filedto.getType().equals("xls")){
System.out.println("xls");
bytes = getxlsContent(file.getPath());
}
//pdf
else if (filedto.getType().equals("pdf")){
System.out.println("pdf");
bytes = getpdfContent(file.getPath());
}
String base64 = Base64.getEncoder().encodeToString(bytes);
filedto.setContent(base64);
System.out.println(filedto.getContent());
}
4.若是只读取内容,不转base64,只需要一下即可(以gettxtContent为例)
String gettxtContent(File filePath) throws IOException {
byte[] bytes = Files.readAllBytes(Paths.get(String.valueOf(filePath)));
String content = new String(bytes, StandardCharsets.UTF_8);
//byte[] bytes1 = content.getBytes();
return content;
}
读取文件内容,并转base64,返回byte[]类型
/**
* 获取txt文件内容
* @param filePath
* @return byte content
* @throws IOException
*/
byte[] gettxtContent(File filePath) throws IOException {
byte[] bytes = Files.readAllBytes(Paths.get(String.valueOf(filePath)));
String content = new String(bytes, StandardCharsets.UTF_8);
byte[] bytes1 = content.getBytes();
return bytes1;
}
/**
* 获取docx文件内容
* @param path
* @return 字节内容
*/
byte[] getdocxContent(String path){
//读取docx文件路径
OPCPackage opcPackage = null;
String content = null;
List<String> docxList = new ArrayList<String>();
try {
opcPackage = POIXMLDocument.openPackage(path);
XWPFDocument xwpf = new XWPFDocument(opcPackage);
POIXMLTextExtractor poiText = new XWPFWordExtractor(xwpf);
content = poiText.getText();
docxList.add(content);
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(docxList);
String ss = String.join(",", docxList);
byte[] bytes1 =ss.getBytes();
return bytes1;
}
//读取word文档中,doc后缀的文件
byte[] getdocContent(String filePath){
List<String> docList = new ArrayList<String>();
String content=null;
//读取字节流,读取文件路径
InputStream input = null;
try {
input = new FileInputStream(new File(filePath));
WordExtractor wex = new WordExtractor(input);
content = wex.getText();
//System.out.println(content);
docList.add(content);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(docList);
String ss = String.join(",",docList);
byte[] bytes1 = ss.getBytes();
return bytes1;
}
/**
* 获取xlsx的内容
* @param filePath
* @return
*/
byte[] getxlsxContent(String filePath){
List<String> list = new ArrayList<>();
//用流的方式读取
FileInputStream fis;
try{
fis = new FileInputStream(new File(filePath));
//获取整个excel
XSSFWorkbook hb = new XSSFWorkbook(fis);
System.out.println(hb.getNumCellStyles());
//获取第一个表单sheet
Sheet sheet = hb.getSheetAt(0);
//获取第一行
int firstrow = sheet.getFirstRowNum();
//最后一行
int lastrow = sheet.getLastRowNum();
//循环行数依次获取列数
for(int i= firstrow;i<lastrow;i++){
Row row = sheet.getRow(i);
if(row != null){
//获取这一行的第一列
int firstcell = row.getFirstCellNum();
//获取这一行的最后一列
int lastcell = row.getLastCellNum();
for(int j = firstcell;j<lastcell;j++){
//获取第j列
Cell cell = row.getCell(j);
if (cell != null ){
System.out.println(cell.toString());
list.add(cell.toString());
}
}
System.out.println();
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(list);
String ss = String.join(",",list);
byte[] bytes1 =ss.getBytes();
return bytes1;
}
/**
* 获取xls的内容
* @param filePath
* @return
*/
byte[] getxlsContent(String filePath){
List<String> list = new ArrayList<>();
try{
//解析excel
POIFSFileSystem pSystem = new POIFSFileSystem(new File(filePath));
//获取整个excel
HSSFWorkbook hb = new HSSFWorkbook(pSystem);
System.out.println(hb.getNumCellStyles());
//获取第一个人表单sheet
HSSFSheet sheet = hb.getSheetAt(0);
//获取第一行
int firstrow = sheet.getFirstRowNum();
//获取最后一行
int lastrow = sheet.getLastRowNum();
for (int i =firstrow;i<lastrow;i++){
//获取那一行i
HSSFRow row = sheet.getRow(i);
if (row != null) {
//获取此行第一列
int firstcell = row.getFirstCellNum();
//最后一列
int lastcell = row.getLastCellNum();
for (int j =firstcell;j<lastcell;j++) {
//获取第j列
HSSFCell cell = row.getCell(j);
if(cell != null) {
list.add(cell.toString());
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(list);
String ss= String.join(",",list);
byte[] bytes1 = ss.getBytes();
return bytes1;
}
/**
* 获取pdf的内容
* @param filePath
* @return
*/
byte[] getpdfContent(String filePath) {
boolean sort = false; //是否排序
int startPage = 1; //开始提取页数
int endPage = Integer.MAX_VALUE; //结束提取页数
String content = null; //暂时存放pdf内容
InputStream input = null;
File file = new File(filePath);
PDDocument document = null;
try{
input = new FileInputStream(file);
//加载pdf文档
PDFParser parser = new PDFParser(input);
parser.parse();
document = parser.getPDDocument();
//获取内容信息
PDFTextStripper pts = new PDFTextStripper();
pts.setSortByPosition(sort);
endPage = document.getNumberOfPages();
pts.setStartPage(startPage);
pts.setEndPage(endPage);
try{
content = pts.getText(document);
System.out.println(content);
}catch (Exception e){
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
String ss = String.join(",",content);
byte[] bytes1= ss.getBytes();
return bytes1;
}