doc合并

最新推荐文章于 2023-06-26 15:50:52 发布
itfallrain
最新推荐文章于 2023-06-26 15:50:52 发布
阅读量473
点赞数
分类专栏： Spring Boot
本文为博主原创文章，未经博主同意不得转载
本文链接：https://blog.csdn.net/qq_38215042/article/details/119250480
版权
Spring Boot 专栏收录该内容
16 篇文章 2 订阅
订阅专栏
package com.landray.kmss.km.doc.util;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStream;

import java.io.OutputStreamWriter;

import java.util.ArrayList;

import java.util.List;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.model.PicturesTable;

import org.apache.poi.hwpf.usermodel.CharacterRun;

import org.apache.poi.hwpf.usermodel.Paragraph;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.Range;

import org.apache.poi.hwpf.usermodel.Table;

import org.apache.poi.hwpf.usermodel.TableCell;

import org.apache.poi.hwpf.usermodel.TableIterator;

import org.apache.poi.hwpf.usermodel.TableRow;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordExcelToHtml {
/**

* 回车符ASCII码

*/

private static final short ENTER_ASCII = 13;

/**

* 空格符ASCII码

*/

private static final short SPACE_ASCII = 32;

/**

* 水平制表符ASCII码

*/

private static final short TABULATION_ASCII = 9;

// public static String htmlText = "";

public static String mainText = "";

public static String htmlTextTbl = "";

public static int counter = 0;

public static int beginPosi = 0;

public static int endPosi = 0;

public static int beginArray[];

public static int endArray[];

public static String htmlTextArray[];

public static boolean tblExist = false;

public static void main(String argv[]) {
try {
String htmlText = "

"
+ "

"; //将每一个Word中的主体部分拿出来，合并之后加上HTML的头和尾，但是要注意编码
List list = new ArrayList();

String file1 = "D://file8";

String file2 = "D://file9";

String file3 = "D://file11";

list.add(file1);

list.add(file2);

list.add(file3);

// String mainText1 = "";

for (int i = 0; i < list.size(); i++) {
htmlText += getWordAndStyle(list.get(i))

+ "
";

//每一个文档读取完之后，加上一个分页符，继续累加

}

htmlText += "";

String filePath = "D://1.html";

writeFile(htmlText, filePath);

new HtmlToDoc().writeWordFile(filePath, "D://file10.doc");

} catch (Exception e) {
e.printStackTrace();

}

}

/**

* 读取每个文字样式

*

* @param fileName

* @throws Exception

*/

public static String getWordAndStyle(String fileName) throws Exception {
String htmlText = "";

FileInputStream in = new FileInputStream(new File(fileName));

//根据文本内容判断是doc还是docx

byte[] b = new byte[4];

in.read(b, 0, b.length);

in.close();

FileInputStream in1 = new FileInputStream(new File(fileName));

System.out.println(bytesToHexString(b) + ";;;");

if (bytesToHexString(b).equalsIgnoreCase("d0cf11e0")) {//"d0cf11e0"代表的是doc文件

HWPFDocument doc = new HWPFDocument(in1);

Range rangetbl = doc.getRange();// 得到文档的读取范围

TableIterator it = new TableIterator(rangetbl);

int num =1;

beginArray = new int[num];

endArray = new int[num];

htmlTextArray = new String[num];

readTable(it, rangetbl);

// 取得文档中字符的总数

int length = doc.characterLength();

// 创建图片容器;

PicturesTable pTable = doc.getPicturesTable();

int cur = 0;

String tempString = "";

for (int i = 0; i < length - 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围

Range range = new Range(i, i + 1, doc);

CharacterRun cr = range.getCharacterRun(0);

if (tblExist && cur < beginArray.length) {
if (i == beginArray[cur]) {
htmlText += tempString + htmlTextArray[cur];

tempString = "";

i = endArray[cur] - 1;

cur++;

continue;

}

}

if (pTable.hasPicture(cr)) {
//htmlText += tempString;

// 读写图片

tempString = readPicture(pTable, cr);

//tempString = "";

htmlText += tempString;

} else {
Range range2 = new Range(i + 1, i + 2, doc);

// 第二个字符

CharacterRun cr2 = range2.getCharacterRun(0);

char c = cr.text().charAt(0);

// 判断是否为回车符

if (c == ENTER_ASCII) {
tempString += "
";

}

// 判断是否为空格符

else if (c == SPACE_ASCII)

tempString += " ";

// 判断是否为水平制表符

else if (c == TABULATION_ASCII)

tempString += "    ";

// 比较前后2个字符是否具有相同的格式

boolean flag = compareCharStyle(cr, cr2);

String fontStyle = "

+ cr.getFontName()

+ ";font-size:"

+ cr.getFontSize()

/ 2

+ "pt;color:"

+ ColorUtils.getHexColor(cr.getIco24()) + ";";

if (cr.isBold())

fontStyle += "font-weight:bold;";

if (cr.isItalic())

fontStyle += "font-style:italic;";

htmlText += fontStyle + "\" mce_style=\"font-family:"

+ cr.getFontName() + ";font-size:"

+ cr.getFontSize() / 2 + "pt;";

if (cr.isBold())

fontStyle += "font-weight:bold;";

if (cr.isItalic())

fontStyle += "font-style:italic;";

htmlText += fontStyle + "\">" + tempString + cr.text()

+ "

";
tempString = "";

}

}

htmlText += tempString;

return htmlText;

} else {
Word2007ToHtml w = new Word2007ToHtml();

String filepath = "";

String fileName1 = fileName;

String htmlName = "D://3.html";

w.Word2007ToHtml(fileName1, htmlName);

String result = w.readFileByBytes(htmlName);

int i = result.indexOf('>');

String realreasult = "

"+result.substring(i+1);
System.out.println(realreasult);

htmlText += realreasult;

return htmlText;

}

}

/**

* 读写文档中的表格

*

* @param pTable

* @param cr

* @throws Exception

*/

public static void readTable(TableIterator it, Range rangetbl)

throws Exception {
htmlTextTbl = "";

// 迭代文档中的表格

counter = -1;

while (it.hasNext()) {
tblExist = true;

htmlTextTbl = "";

Table tb = (Table) it.next();

beginPosi = tb.getStartOffset();

endPosi = tb.getEndOffset();

System.out.println("............" + beginPosi + "...." + endPosi);

counter = counter + 1;

// 迭代行，默认从0开始

beginArray[counter] = beginPosi;

endArray[counter] = endPosi;

htmlTextTbl += "

for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);

htmlTextTbl += "

";
// 迭代列，默认从0开始

for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);// 取得单元格

int cellWidth = td.getWidth();

// 取得单元格的内容

for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);

String s = para.text().toString().trim();

if (s == "") {
s = " ";

}

System.out.println(s);

htmlTextTbl += "

" + s + "";
System.out.println(i + ":" + j + ":" + cellWidth + ":"

+ s);

} // end for

} // end for

} // end for

htmlTextTbl += "

";
htmlTextArray[counter] = htmlTextTbl;

} // end while

}

/**

* 读写文档中的图片

*

* @param pTable

* @param cr

* @throws Exception

*/

public static void readPicture(PicturesTable pTable, CharacterRun cr)

throws Exception {
// 提取图片

Picture pic = pTable.extractPicture(cr, false);

// 返回POI建议的图片文件名

String afileName = pic.suggestFullFileName();

OutputStream out = new FileOutputStream(new File("e://test"

+ File.separator + afileName));

pic.writeImageContent(out);

// htmlText += "

// + "\" mce_src=\"e://test//" + afileName + "\"/>";

}

public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) {
boolean flag = false;

if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic()

&& cr1.getFontName().equals(cr2.getFontName())

&& cr1.getFontSize() == cr2.getFontSize()) {
flag = true;

}

return flag;

}

/**

* 写文件

*

* @param s

*/

public static void writeFile(String s, String filePath) {
FileOutputStream fos = null;

BufferedWriter bw = null;

try {
File file = new File(filePath);

fos = new FileOutputStream(file);

bw = new BufferedWriter(new OutputStreamWriter(fos));

bw.write(s);

} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();

} catch (IOException ioe) {
ioe.printStackTrace();

} finally {
try {
if (bw != null)

bw.close();

if (fos != null)

fos.close();

} catch (IOException ie) {
}

}

}

// 判断文件类型

public static String bytesToHexString(byte[] src) {
StringBuilder stringBuilder = new StringBuilder();

if (src == null || src.length <= 0) {
return null;

}

for (int i = 0; i < src.length; i++) {
int v = src[i] & 0xFF;

String hv = Integer.toHexString(v);

if (hv.length() < 2) {
stringBuilder.append(0);

}

stringBuilder.append(hv);

}

return stringBuilder.toString();

}

}

获取字体颜色的工具类：

package com.landray.kmss.km.doc.util;

public class ColorUtils {
public static int  red(int c) {
return c & 0XFF;

}

public static int green(int c) {
return (c >> 8) & 0XFF;

}

public static int blue(int c) {
return (c >> 16) & 0XFF;

}

public static int rgb(int c) {
return (red(c) << 16) | (green(c) <<8) | blue(c);

}

public static String rgbToSix(String rgb) {
int length = 6 - rgb.length();

String str = "";

while(length > 0){
str += "0";

length--;

}

return str + rgb;

}

public static String getHexColor(int color) {
color = color == -1 ? 0 : color;

int rgb = rgb(color);

return "#" + rgbToSix(Integer.toHexString(rgb));

}

}

将HTML文件转换为doc文件：

package com.landray.kmss.km.doc.util;

import java.io.BufferedReader;

import java.io.ByteArrayInputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.nio.charset.Charset;

import org.apache.poi.poifs.filesystem.DirectoryEntry;

import org.apache.poi.poifs.filesystem.DocumentEntry;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;

//将docx文件转为HTML

package com.landray.kmss.km.doc.util;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.OutputStream;

import java.io.Reader;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.core.FileURIResolver;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.junit.Test;

public  class Word2007ToHtml{
@Test

public void Word2007ToHtml(String fileName,String htmlName) throws IOException {
final String file = fileName;

File f = new File(file);

if (!f.exists()) {
System.out.println("Sorry File does not Exists!");

} else {
// ) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(f);

XWPFDocument document = new XWPFDocument(in);

// ) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

File imageFolderFile = new File("D://");

XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));

options.setExtractor(new FileImageExtractor(imageFolderFile));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// ) 将 XWPFDocument转换成XHTML

File file1 = new File(htmlName);

OutputStream out = new FileOutputStream(file1);

XHTMLConverter.getInstance().convert(document, out, options);

//也可以使用字符数组流获取解析的内容

//                ByteArrayOutputStream baos = new ByteArrayOutputStream();

//                XHTMLConverter.getInstance().convert(document, baos, options);

//                String content = baos.toString();

//                System.out.println(content);

//                 baos.close();

}

}

public static void main(String[] args) throws IOException {
Word2007ToHtml w = new Word2007ToHtml();

String fileName = "D://file1.docx";

String htmlName = "D://3.html";

w.Word2007ToHtml(fileName,htmlName);

String result = readFileByBytes(htmlName);

System.out.println(result);

}

public static String readFileByBytes(String fileName) {
String s="";

File file = new File(fileName);

Reader reader = null;

try {
//System.out.println("以字符为单位读取文件内容，一次读一个字节：");

// 一次读一个字符

reader = new InputStreamReader(new FileInputStream(file),"utf-8");

int tempchar;

while ((tempchar = reader.read()) != -1) {
// 对于windows下，\r\n这两个字符在一起时，表示一个换行。

// 但如果这两个字符分开显示时，会换两次行。

// 因此，屏蔽掉\r，或者屏蔽\n。否则，将会多出很多空行。

if (((char) tempchar) != '\r') {
s +=(char) tempchar;

}

}

reader.close();

} catch (Exception e) {
e.printStackTrace();

}

return s;

}

}

/**    * 将html文档转为doc    * @author soildwang   *   */

public class HtmlToDoc {
/**

* 读取html文件到word           *

* @param filepath html文件的路径           * @return

* * @throws Exception           */

public boolean writeWordFile(String filepath,String outfile) throws Exception {
boolean flag = false;

ByteArrayInputStream bais = null;

FileOutputStream fos = null;

//String outfile = "D://file8.doc";  //根据实际情况写路径

try {
if (!"".equals(outfile)) {
File fileDir = new File(outfile);

if (fileDir.exists()) {
String content = readFile(filepath);

byte b[] = content.getBytes();

bais = new ByteArrayInputStream(b);

POIFSFileSystem poifs = new POIFSFileSystem();

DirectoryEntry directory = poifs.getRoot();

DocumentEntry documentEntry =  directory.createDocument("WordDocument", bais);

fos = new FileOutputStream(outfile);

poifs.writeFilesystem(fos);

bais.close();

fos.close();

}

}

} catch (IOException e) {
e.printStackTrace();

} finally {
if(fos != null) fos.close();

if(bais != null) bais.close();

}                 return flag;

}

/**

* * 读取html文件到字符串           * @param filename

* * @return           * @throws Exception

* */

public String readFile(String filename) throws Exception {
StringBuffer buffer = new StringBuffer("");

BufferedReader br = null;

try {
br = new BufferedReader(new InputStreamReader(new  FileInputStream(new File(filename)),Charset.forName("utf-8")));

buffer = new StringBuffer();

while (br.ready())

buffer.append((char) br.read());

} catch (Exception e) {
e.printStackTrace();

} finally {
if(br!=null) br.close();

}

return buffer.toString();

}

//局部测试

public static void main(String[] args) throws Exception {
new HtmlToDoc().writeWordFile("d://1.html","D://file8.doc");//根据实际情况写文件路径

}

}
itfallrain
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
doc合并

package com.landray.kmss.km.doc.util;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStream;
复制链接

扫一扫