Word
Apache POI
-Apache出品,必属精品,poi.apache.org
-可处理docx,xlsx,pptx,visio等office套件
-纯Java工具包,无需第三方依赖-主要类
·XWPFDocument整个文档对象
·XWPFParagraph 段落
XWPFRun 一个片段(字体样式相同的一段)
XWPFPicture图片
XWPFTable 表格
package org.example;
import org.apache.poi.xwpf.usermodel.*;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
public class TextRead {
public static void main(String[] args) throws Exception {
readDocx();
}
public static void readDocx() throws Exception {
InputStream is;
is = new FileInputStream("test.docx");
XWPFDocument xwpf = new XWPFDocument(is);
List<IBodyElement> ibs = xwpf.getBodyElements();
for (IBodyElement ib : ibs) {
BodyElementType bet = ib.getElementType();
if (bet == BodyElementType.TABLE) {
//表格
System.out.println("table" + ib.getPart());
} else {
XWPFParagraph para = (XWPFParagraph) ib;
System.out.println("It is a new paragraph....The indention is "+para.getFirstLineIndent());
//System.out.println(para.getCTP().xmlText());
List<XWPFRun> res = para.getRuns();
System.out.println("run");
if (res.size() <= 0)
System.out.println("empty line");
for (XWPFRun re : res) {
if (null == re.text() || re.text().length() <= 0) {
if (re.getEmbeddedPictures().size() >0){
System.out.println("image***"+re.getEmbeddedPictures());
}else{
System.out.println("objects:"+re.getCTR().getObjectList());
if(re.getCTR().xmlText().indexOf("instrText") > 0){
System.out.println("this is an equation field.");
System.out.println(re.getCTR().xmlText());
}else {
System.out.println(re.getCTR().xmlText());
}
}
}else{
System.out.println("===="+re.getCharacterSpacing()+re.text());
}
}
}
}
is.close();
}
}
Excel
package org.example;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.Iterator;
public class ReadWriteExcelFile {
public static void readXLSXFile() throws IOException {
InputStream ExcelFileToRead = new FileInputStream("c:/temp/test.xlsx");
XSSFWorkbook wb = new XSSFWorkbook(ExcelFileToRead);
XSSFSheet sheet = wb.getSheetAt(0);
XSSFRow row;
XSSFCell cell;
Iterator rows = sheet.rowIterator();
while (rows.hasNext()) {
row = (XSSFRow) rows.next();
Iterator cells = row.cellIterator();
while (cells.hasNext()) {
cell = (XSSFCell) cells.next();
System.out.print(cell.getStringCellValue() + " ");
/* if (cell.getCellType() == XSSFCell.STRING) {
System.out.print(cell.getStringCellValue() + " ");
} else if (cell.getCellType() == XSSFCell.NUMERIC) {
System.out.print(cell.getNumericCellValue() + " ");
} else {
//U Can Handel Boolean, Formula, Errors
}*/
}
System.out.println();
}
}
public static void writeXLSXFile() throws IOException {
String excelFileName = "c:/temp/test.xlsx";// name of excel file
String sheetName = "sheet1";// name of jheet
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet(sheetName);
// iterating r number of rows
for (int r = 0; r < 5; r++) {
XSSFRow row = sheet.createRow(r);
//iterating c number of columns
for (int c = 0; c < 5; c++) {
XSSFCell cell = row.createCell(c);
cell.setCellValue("Cell " + r + " " + c);
}
}
FileOutputStream fileOut = new FileOutputStream(excelFileName);
wb.write(fileOut);
fileOut.flush();
fileOut.close();
}
public static void main(String[] args) throws IOException {
writeXLSXFile();
readXLSXFile();
}
}
CSV
全称:Comma-Seperated Values文件(逗号分隔)
·广义CSV文件,可以由空格/Tab键/分号//完成字段分隔
第三方包:Apache Commons CSV
-CSVFormat 文档格式
-CSVParser 解析文档
-CSVRecord 一行记录
-CSVPrinter 写入文档
package org.example;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.poi.xwpf.usermodel.*;
import java.io.*;
import java.time.LocalDate;
import java.util.List;
public class CSVTest {
public static void main(String[] args) throws Exception {
ReadCSVWithIndex();
readCSVWithName();
writeCSV();
}
public static void ReadCSVWithIndex() throws Exception {
Reader in = new FileReader("c:/temp/test.csv");
Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
for (CSVRecord record : records) {
System.out.println(record.get(0));//0 代表第一列
}
}
public static void readCSVWithName() throws Exception {
Reader in = new FileReader("c:/temp/test.csv");
Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader("name", "age", "score").parse(in);
for (CSVRecord record : records) {
System.out.println(record.get("score"));//0 代表第一列
}
}
public static void writeCSV() throws Exception {
try (CSVPrinter printer = new CSVPrinter(new FileWriter("person.csv"), CSVFormat.EXCEL)) {
printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
printer.printRecord(1, "john3", "John", "Doe", LocalDate.of(1973, 9, 15));
printer.println();//空自行
printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985,3,29));
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
Apache PDFBox
纯Java类库
主要功能:创建,提取文本,分割/合并/删除
-主要类
。PDDocument pdf文档对象
。PDFTextStripperpdf文本对象
。 PDFMergerUtility合并工具
读
package org.example;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File;
import java.io.IOException;
public class PdfRead {
public static void main(String[] args) throws Exception {
File pdfFile = new File("sample.pdf");
PDDocument document = null;
try {
document = PDDocument.load(pdfFile);
AccessPermission ap = document.getCurrentAccessPermission();
if (!ap.canExtractContent()) {
throw new IOException("没有权限");
}
int pages = document.getNumberOfPages();
//读文本内容
PDFTextStripper stripper = new PDFTextStripper();//设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(1);//起始页
stripper.setEndPage(pages);//结束页
String content = stripper.getText(document);
System.out.println(content);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
写
package org.example;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
public class PdfWriter {
public static void main(String[] args) throws Exception {
createHelloPDF();
}
public static void createHelloPDF() {
PDDocument doc = null;
PDPage page = null;
try {
doc = new PDDocument();
page = new PDPage();
doc.addPage(page);
PDFont font = PDMMType1Font.HELVETICA_BOLD;
PDPageContentStream content = new PDPageContentStream(doc,page);
content.beginText();
content.setFont(font,12);
content.moveTextPositionByAmount(100,700);
content.showText("hello world");
content.endText();
content.close();
doc.save("test.pdf");
doc.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}