word转html

7 篇文章 0 订阅
package com.gohouse.oss.servlet;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import javax.naming.InitialContext;
import javax.naming.NamingException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;

import com.gohouse.oss.util.ContextUtil;
import com.gohouse.oss.util.HttpClientHelper;
import com.gohouse.util.log.Log;
import com.gohouse.util.log.Logger;
import com.google.gson.Gson;

public class WordToHtml extends HttpServlet {

	private static final long serialVersionUID = 1L;
	private static Log log = Logger.getLogger(WordToHtml.class);

	/** 
	 * 回车符ASCII码 
	 */
	private static final short ENTER_ASCII = 13;

	/** 
	 * 空格符ASCII码 
	 */
	private static final short SPACE_ASCII = 32;

	/** 
	 * 水平制表符ASCII码 
	 */
	private static final short TABULATION_ASCII = 9;

	private String htmlText = "";

	public String htmlTextTbl = "";
	public int counter = 0;
	public int beginPosi = 0;
	public int endPosi = 0;
	public int beginArray[];
	public int endArray[];
	public String htmlTextArray[];
	public boolean tblExist = false;

	public final String inputFile = "C:/Users/miju/Desktop/aa.doc";

	//	public static void main(String argv[]) {
	//		try {
	//			getWordAndStyle(inputFile);
	//		} catch (Exception e) {
	//			// TODO Auto-generated catch block
	//			e.printStackTrace();
	//		}
	//	}

	@SuppressWarnings("rawtypes")
	public void doPost(HttpServletRequest request, HttpServletResponse response)
			throws ServletException, IOException {

		request.setCharacterEncoding("UTF-8");
		response.setContentType("text/html");
		PrintWriter out = response.getWriter();
		String firePath = getServletContext().getRealPath("/files");

		FileItemFactory factory = new DiskFileItemFactory();
		ServletFileUpload upload = new ServletFileUpload(factory);
		upload.setHeaderEncoding("UTF-8");
		
		File file = null;
		FileInputStream in = null;
		
		try {
			List items = upload.parseRequest(request);
			if (null != items) {
				Iterator itr = items.iterator();
				while (itr.hasNext()) {
					FileItem item = (FileItem) itr.next();
					String uploadName = item.getName();
					if((uploadName.toLowerCase()).endsWith("doc")){
						if (item.isFormField()) {
							continue;
						} else {
							// 以当前精确到秒的日期为上传的文件的文件名
							SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddkkmmss");
							String fileName = sdf.format(new Date());
							// 创建上传文件夹,已时间命名
							file = new File(firePath + File.separator + fileName);
							if(!file.exists()){
								file.mkdirs();
							}
							// 保存word文件
							File savedFile = new File(file, item.getName());
							item.write(savedFile);
							
							in = new FileInputStream(savedFile);
							String content =  getWordAndStyle(in,firePath + File.separator + fileName);
							Gson gosn = new Gson();
							out.print("{'content':" + gosn.toJson(content) + "}");
							// 删除上传文件
							deleteFile(file);
						}
					}else{
						out.print("{'error': '请上传正确的word-2003格式'}");
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		out.flush();
		out.close();
	}

	/**
	 * 读取每个文字样式
	 * 
	 * @param fileName
	 * @throws Exception
	 */
	public String getWordAndStyle(InputStream in,String path) throws Exception {

		HWPFDocument doc = new HWPFDocument(in);

		// 取得文档中字符的总数  
		int length = doc.characterLength();
		// 创建图片容器  
		PicturesTable pTable = doc.getPicturesTable();

		htmlText = "";
		// 创建临时字符串,好加以判断一串字符是否存在相同格式  
		String tempString = "";

		for (int i = 0; i < length - 1; i++) {
			// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围  
			Range range = new Range(i, i + 1, doc);
			CharacterRun cr = range.getCharacterRun(0);

			if (pTable.hasPicture(cr)) {
				// 读写图片  
				tempString += this.readPicture(pTable, cr, path);
			} else {

				Range range2 = new Range(i + 1, i + 2, doc);
				// 第二个字符  
				CharacterRun cr2 = range2.getCharacterRun(0);
				// 当前字符  
				char currentChar = cr.text().charAt(0);

				// 判断是否为回车符  
				if (currentChar == ENTER_ASCII)
					tempString += "<br/>";
				// 判断是否为空格符  
				else if (currentChar == SPACE_ASCII)
					tempString += " ";
				// 判断是否为水平制表符  
				else if (currentChar == TABULATION_ASCII)
					tempString += "    ";
				// 比较前后2个字符是否具有相同的格式  
				boolean flag = compareCharStyle(cr, cr2);

				String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize()/ 2 + "pt;";

				if (cr.isBold())
					fontStyle += "font-weight:bold;";
				if (cr.isItalic())
					fontStyle += "font-style:italic;";

				if (flag && i != length - 2)
					tempString += currentChar;
				else if (!flag) {
					htmlText += fontStyle + "'>" + tempString + currentChar + "</span>";
					tempString = "";
				} else
					htmlText += fontStyle + "'>" + tempString + currentChar + "</span>";
			}
		}
		return htmlText;
	}

	/** 
	 * 读写文档中的图片 
	 *  
	 * @param pTable 
	 * @param cr 
	 * @throws Exception 
	 */
	private String readPicture(PicturesTable pTable, CharacterRun cr, String path)
			throws Exception {
		// 提取图片  
		Picture pic = pTable.extractPicture(cr, false);

		// 返回POI建议的图片文件名  
		String afileName = pic.suggestFullFileName();

		OutputStream out = new FileOutputStream(new File(path + File.separator + afileName));

		pic.writeImageContent(out);
		out.flush();
		out.close();
		
		// 上传图片
		HttpClientHelper.loginApi("", "");
		String token = ContextUtil.getToken(ContextUtil.API_KEY);
		
		String url = domain() + "/rest/images/attachments/json/0/0/0/0/-1/" + token;
		String guid = postUploadImg(url, new File(path + File.separator + afileName));

		return "<img src='" + domain() + "/rest/images/" + guid +"'/>";
	}
	
	/**
	 * 上传图片
	 * @param url 上传图片地址
	 * @param file 图片文件
	 * @return guid
	 */
	private String postUploadImg(String url,File file){
		
		try {
			// 上传图片
			HttpClient httpclient = new DefaultHttpClient();
			HttpPost httppost = new HttpPost(url);

			MultipartEntity reqEntity = new MultipartEntity();
			reqEntity.addPart("files", new FileBody(file));
			httppost.setEntity(reqEntity);

			log.info("执行: " + httppost.getRequestLine());
			HttpResponse response = httpclient.execute(httppost);
			log.info("StatusCode = " + response.getStatusLine().getStatusCode());

			HttpEntity resEntity = response.getEntity();
			String responseText = null;
			if (resEntity != null) {
				log.info("----------------------------------------");
				log.info(response.getStatusLine().toString());
				log.info("返回长度: " + resEntity.getContentLength());
				log.info("返回类型: " + resEntity.getContentType());

				InputStream in = resEntity.getContent();
				log.info("responseText = " + (responseText = HttpClientHelper.getStringByInputStream(in)));
			}
			if (resEntity != null) {
				InputStream is = resEntity.getContent();
				if (is != null) {
					is.close();
				}
			}
			return responseText.substring(responseText.indexOf("photo")+8, responseText.indexOf(",",responseText.indexOf("photo"))-1);
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (IllegalStateException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return "";
	}

	private boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) {
		if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic()
				&& cr1.getFontName().equals(cr2.getFontName())
				&& cr1.getFontSize() == cr2.getFontSize()) {
			return true;
		}
		return false;
	}

	/**
	 * 读取配置文件中的rest服务器地址
	 * 
	 * @return
	 */
	private String domain() {
		try {
			InitialContext ic = new InitialContext();
			return "http://" + (String) ic.lookup("java:comp/env/API_SITE_DOMAIN");
		} catch (NamingException e) {
			log.error("获取 domain 失败!" + e.getMessage());
		}
		return "";
	}
	
	/**
	 * 删除文件夹
	 * @param file
	 */
	private void deleteFile(File file) {
		File[] files = file.listFiles();
		for (File deleteFile : files) {
			if (deleteFile.isDirectory()) {
				// 如果是文件夹,则递归删除下面的文件后再删除该文件夹
				deleteFile(deleteFile);
			} else {
				deleteFile.delete();
			}
		}
		file.delete();
	}    

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值