java导出word xml格式_使用Java的POI进行Word文档的解析并生成XML格式文档

如下代码可以实现使用Java的POI进行Word文档的解析并生成XML格式文档功能,此代码编译通过,但是运行有问题,读者可以亲自试试并能否改bug:

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.io.OutputStreamWriter;

import java.io.Writer;

import java.nio.charset.Charset;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.model.StyleDescription;

import org.apache.poi.hwpf.model.StyleSheet;

import org.apache.poi.hwpf.usermodel.CharacterRun;

import org.apache.poi.hwpf.usermodel.Paragraph;

import org.apache.poi.hwpf.usermodel.Range;

public final class Word2Forrest {

Writer _out;

HWPFDocument _doc;

@SuppressWarnings("unused")

public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException {

OutputStreamWriter out = new OutputStreamWriter(stream, Charset.forName("UTF-8"));

_out = out;

_doc = doc;

init();

openDocument();

openBody();

Range r = doc.getRange();

StyleSheet styleSheet = doc.getStyleSheet();

int sectionLevel = 0;

int lenParagraph = r.numParagraphs();

boolean inCode = false;

for (int x = 0; x < lenParagraph; x++) {

Paragraph p = r.getParagraph(x);

String text = p.text();

if (text.trim().length() == 0) {

continue;

}

StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex());

String styleName = paragraphStyle.getName();

if (styleName.startsWith("Heading")) {

if (inCode) {

closeSource();

inCode = false;

}

int headerLevel = Integer.parseInt(styleName.substring(8));

if (headerLevel > sectionLevel) {

openSection();

} else {

for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) {

closeSection();

}

openSection();

}

sectionLevel = headerLevel;

openTitle();

System.out.println("++++++" + p.text());

writePlainText(text);

closeTitle();

} else {

int cruns = p.numCharacterRuns();

CharacterRun run = p.getCharacterRun(0);

String fontName = run.getFontName();

if (fontName.startsWith("Courier")) {

if (!inCode) {

openSource();

inCode = true;

}

System.out.println("------" + p.text());

writePlainText(p.text());

} else {

if (inCode) {

inCode = false;

closeSource();

}

openParagraph();

System.out.println("******" + p.text());

writePlainText(p.text());

closeParagraph();

}

}

}

for (int x = 0; x < sectionLevel; x++) {

closeSection();

}

closeBody();

closeDocument();

_out.flush();

}

public void init() throws IOException {

_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");

_out.write(

""-//APACHE//DTD Documentation V1.1//EN\" \"./dtd/document-v11.dtd\">\r\n");

}

public void openDocument() throws IOException {

_out.write("\r\n");

}

public void closeDocument() throws IOException {

_out.write("\r\n");

}

public void openBody() throws IOException {

_out.write("

\r\n");

}

public void closeBody() throws IOException {

_out.write("\r\n");

}

public void openSection() throws IOException {

_out.write("");

}

public void closeSection() throws IOException {

_out.write("");

}

public void openTitle() throws IOException {

_out.write("

");

}

public void closeTitle() throws IOException {

_out.write("

");

}

public void writePlainText(String text) throws IOException {

_out.write(text);

}

public void openParagraph() throws IOException {

_out.write("

");

}

public void closeParagraph() throws IOException {

_out.write("

");

}

public void openSource() throws IOException {

_out.write("

}

public void closeSource() throws IOException {

_out.write("]]>

");

}

public static void main(String[] args) throws IOException {

InputStream is = new FileInputStream("D:/QMDownload/hwpftest.doc");

OutputStream out = new FileOutputStream("D:/QMDownload/test.xml");

try {

new Word2Forrest(new HWPFDocument(is), out);

} finally {

out.close();

is.close();

}

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值