java 读取word内容_java中读取word文档里的内容

package com.cn.peitest.excel.word;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.PrintStream;

import java.util.Iterator;

import java.util.List;

import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.apache.poi.xwpf.usermodel.XWPFParagraph;

import org.apache.poi.xwpf.usermodel.XWPFPictureData;

import org.apache.poi.xwpf.usermodel.XWPFRun;

import org.apache.poi.xwpf.usermodel.XWPFTable;

import org.apache.poi.xwpf.usermodel.XWPFTableCell;

import org.apache.poi.xwpf.usermodel.XWPFTableRow;

/**

* 读取word文档

* @author Pei

*

*/

public class ReadWord {

public static void main(String[] args) throws IOException{

String docx = "C:\\Users\\Pei\\Desktop\\pei (2).docx";

XWPFDocument document = read_file(docx);

readPar(document, "C:\\Users\\Pei\\Desktop\\左侧", "C:\\Users\\Pei\\Desktop\\左侧", "C:\\Users\\Pei\\Desktop\\左侧");

readTableContent(document);

}

/** * 遍历段落内容

* docxReadPath 文档地址

* uploadPic 图片上传地址

* picFile 图片保存后地址

* @param document

* @return XWPFDocument

* @throws IOException

*/

public static String readPar(XWPFDocument document,String docxReadPath,String uploadPic,String picFile){

String fail="sucess";

Iterator itPara = document.getParagraphsIterator();

try {

PrintStream ps=new PrintStream("C:\\Users\\Pei\\Desktop\\test.txt");

System.setOut(ps);

//读取word中所有内容

while (itPara.hasNext()) {

XWPFParagraph paragraph = (XWPFParagraph) itPara.next();

//run表示相同区域属性相同的字符,结果以‘,’分隔;

List runs =paragraph.getRuns();// paragraph.getRuns();

String fileName="";

for (int i = 0; i < runs.size(); i++){

String oneparaString = runs.get(i).getText(runs.get(i).getTextPosition());

System.out.println(oneparaString);

}

}

List picList = document.getAllPictures();for (XWPFPictureData pic : picList) {

byte[] bytev = pic.getData();

String imgName=pic.getFileName();

System.out.println("=====图片生成中========"+imgName);

if(!"image1.jpeg".equals(imgName)){

FileOutputStream fos = new FileOutputStream(uploadPic+"/"+imgName);

fos.write(bytev);

}

}

} catch (Exception e) {

e.printStackTrace();

System.out.println("=====错误信息===="+e.getMessage());

fail="false";

}

return fail;

}

/**

* 遍历所有表格的内容

* @param document

* @throws FileNotFoundException

*/

public static void readTableContent(XWPFDocument document) {

Iterator itTable = document.getTablesIterator();

int ind = 0;

while (itTable.hasNext()){

ind++;

XWPFTable table = (XWPFTable) itTable.next();

//行

int rcount = table.getNumberOfRows();

for (int i = 0; i < rcount; i++){

XWPFTableRow row = table.getRow(i);

//列

List cells = row.getTableCells();

int len = cells.size();

for(int j = 0;j < len;j++){

XWPFTableCell xc = cells.get(j);

String sc = xc.getText();

System.out.println("第"+ ind +"个表格,第"+ (i+1) +"行,第"+ (j+1) +"列:" +sc);

}

}

}

}

/**

* 读取文件

* @param srcPath

* @return XWPFDocument

*/

public static XWPFDocument read_file(String srcPath)

{

String[] sp = srcPath.split("\\.");

if ((sp.length > 0) && sp[sp.length - 1].equalsIgnoreCase("docx"))

{

try {

FileInputStream fis = new FileInputStream(srcPath);

XWPFDocument xdoc = new XWPFDocument(fis);

XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);

// OPCPackage pack = POIXMLDocument.openPackage(srcPath);

// XWPFDocument doc = new XWPFDocument(pack);

return xdoc;

} catch (IOException e) {

System.out.println("读取文件出错!");

e.printStackTrace();

return null;

}

}

return null;

}

}

//pom.xml文件

4.0.0

cn.com

excelReadAndWrite

0.0.1-SNAPSHOT

junit

junit

3.8.1

test

org.apache.directory.studio

org.apache.commons.codec

1.8

net.sourceforge.jexcelapi

jxl

2.6.12

org.apache.poi

poi-ooxml

3.9

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值