import fr.opensagres.xdocreport.core.io.IOUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class POIUtil {
public POIUtil() {}
/**
* 2003world转html
* @param sourceFilePath 2003world文件路径
* @param targetFilePath 转成html后的地址
* @return 转成html后,保存的html路径
* @throws Exception
*/
public static String docToHtml(String sourceFilePath, String targetFilePath) throws Exception {
// String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
// String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
wordToHtmlConverter.processDocument(new HWPFDocument(new FileInputStream(sourceFilePath)));
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(new File(targetFilePath)));
return targetFilePath;
}
/**
* 2007转html
* @param sourceFilePath 2007world文件路径
* @param targetFilePath 转成html后的地址
* @return
* @throws Exception
*/
public static String docxToHtml(String sourceFilePath, String targetFilePath) throws Exception {
// String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
// String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFilePath));
XHTMLOptions options = XHTMLOptions.create();
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFilePath), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return targetFilePath;
}
public static String docToHtml(InputStream fileInputStream) throws Exception {
// String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
// String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
wordToHtmlConverter.processDocument(new HWPFDocument(fileInputStream));
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(baos));
return IOUtils.toString(new ByteArrayInputStream(baos.toByteArray()), "UTF-8");
// return targetFilePath;
}
public static String docxToHtml(InputStream fileInputStream) throws Exception {
// String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
// String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
String htmlStr = "";
OutputStreamWriter outputStreamWriter = null;
try {
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
xhtmlConverter.convert(new XWPFDocument(fileInputStream), baos, XHTMLOptions.create());
htmlStr = IOUtils.toString(new ByteArrayInputStream(baos.toByteArray()));
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return htmlStr;
}
/**
* 读取文件
* @param filePath 需要读取的文件路径
* @return
*/
public static String readFile(String filePath) {
File file = new File(filePath);
InputStream input = null;
try {
input = new FileInputStream(file);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
StringBuffer buffer = new StringBuffer();
byte[] bytes = new byte[1024];
try {
for (int n; (n = input.read(bytes)) != -1;) {
buffer.append(new String(bytes, 0, n, "utf8"));
}
} catch (IOException e) {
e.printStackTrace();
}
// return buffer.toString();
return changeStyle(buffer);
}
public static String changeStyle(StringBuffer buff) {
StringBuffer buffStyle = new StringBuffer();
//截取样式代码
buffStyle.append(buff.substring(buff.indexOf("<style type=\"text/css\">") + 20, buff.indexOf("</style>")));
// System.out.println(buffStyle);
//截取body代码
String body = buff.substring(buff.indexOf("<body"),buff.indexOf("</body")+7);
body = body.replaceAll("body","div");
StringBuffer bodyBuffer = new StringBuffer(body);
// System.out.println(bodyBuffer);
String[] split = buffStyle.toString().split("}");
Map<String,String> styleMap = new HashMap<>();
for (String s1 : split) {
// System.out.println(s1);
String[] split1 = s1.split("\\{");
if (null != split1 && 1 < split1.length) {
styleMap.put(split1[0].substring(1),split1[1]);
}
}
Set<String> strings = styleMap.keySet();
for (String key : strings) {
// System.out.print("key : "+key);
// System.out.println(" value : "+styleMap.get(key));
//将嵌入样式转换为行内样式
if(bodyBuffer.toString().contains(key)){
int length = bodyBuffer.toString().split(key).length - 1 ;
int temp = 0 ;
for (int i = 0 ; i < length ; i++){
//首先判断是否完全匹配这个样式的class标识
//由于word转换为html的时候他会自动生成class的标识 比如 p1,p2,p3,p4,p10,p11这样的话使用contains方法
//p1就会被p11匹配到,这样样式就会乱掉,所以在添加行内样式之前必须要进行完全匹配
temp = bodyBuffer.indexOf(key,temp);
String isComplete = bodyBuffer.substring(temp, temp + key.length() + 1);
//这个地方key+" "意思是代表可能一个标签里面有多个class标识 比如 class = "p2 p3 p4"
if(!isComplete.equals(key+"\"") && !isComplete.equals(key+" ")){
//这种就代表不是完全匹配
continue;
}
//这个是每次查询到的位置,判断此标签中是否添加了style标签
String isContaionStyle = bodyBuffer.substring(temp,bodyBuffer.indexOf(">",temp));
if(isContaionStyle.contains("style")){
//代表已经存在此style,那么直接加进去就好了
//首先找到style的位置
int styleTemp = bodyBuffer.indexOf("style",temp);
bodyBuffer.insert(styleTemp+7,styleMap.get(key));
}else{
//代表没有style,那么直接插入style
int styleIndex = bodyBuffer.indexOf("\"",temp);
bodyBuffer.insert(styleIndex+1," style=\""+styleMap.get(key)+"\"");
}
temp += key.length() + 1;
}
}
}
return bodyBuffer.toString();
// changePicture(bodyBuffer);
}
//更换图片的路径
public void changePicture(StringBuffer buffer) {
//查询一个有多少个图片
int length = buffer.toString().split("<img src=\"").length - 1;
int temp = 0;
for (int i = 0; i < length; i++) {
temp = buffer.indexOf("<img src=\"", temp);
String srcContent = buffer.substring(temp + 10, buffer.indexOf("style", temp + 10));
//获取第三方文件服务器的路径,比如如下realSrc
String realSrc = "";
//将路径进行替换
buffer.replace(temp + 10, buffer.indexOf("style", temp + 10), realSrc + "\"");
temp++;
}
}
public static ByteArrayInputStream html2World(String content) throws Exception {
byte b[] = content.getBytes("utf-8"); //这里是必须要设置编码的,不然导出中文就会乱码。
//生成word
POIFSFileSystem poifs = new POIFSFileSystem();
DocumentEntry documentEntry = poifs.getRoot().createDocument("WordDocument", new ByteArrayInputStream(b));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
poifs.writeFilesystem(baos);
return new ByteArrayInputStream(baos.toByteArray());
}
}
POI工具类(word转html)
最新推荐文章于 2024-05-13 17:47:15 发布