Word文档的读取,WordToHtml(Android)

由于涉及到当前正在做的一个项目,所以并没有怎样整理就放上来的,相信大部分人都看得懂,至于POI中wordToHtmlConverter的processDocument出错的原因及修改办法在我的笔记中提及(http://blog.csdn.net/syfyw/article/details/8145653),请修改源码并重新编译即可.

该解决方案还处于未完成状态,希望在android下解析word文档遇到困难的朋友能从中得到灵感.

至于另外一个解决方案:http://blog.csdn.net/paulluo0739/article/details/6611327虽然自定义度广,但是对于文档的还原度很低,包括字体颜色,大小以及数字表头等基本上没有还原,建议希望能够高度还原文档的朋友还是采用poi的wordToHtmlConverter类吧.

package com.fs.poitest;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.security.MessageDigest;
import java.util.Iterator;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import android.media.ThumbnailUtils;
import android.os.Bundle;
import android.os.Handler;
import android.os.Looper;
import android.os.Message;
import android.annotation.SuppressLint;
import android.app.Activity;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.support.v4.view.PagerAdapter;
import android.support.v4.view.ViewPager;
import android.support.v4.view.ViewPager.OnPageChangeListener;
import android.view.Gravity;
import android.view.Menu;
import android.view.View;
import android.view.ViewGroup;
import android.webkit.WebSettings.ZoomDensity;
import android.webkit.WebView;
import android.widget.BaseAdapter;
import android.widget.GridView;
import android.widget.ImageView;
import android.widget.LinearLayout;
import android.widget.ListView;
import android.widget.TextView;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

@SuppressLint("SdCardPath")
public class MainActivity extends Activity {
	private ViewPager web_pager;
	private WebPagerAdapter web_pager_adapter;
	public WebView web_now;
	private final int range_size = 2000;
	private HWPFDocument doc = null;
	private String file_md5 = null;
	private int doc_len = 0;
	private int doc_pager = 0;
	private String lock = new String();
	private boolean is_exited = false;
	private boolean has_all_decoded = false;
	
	@SuppressWarnings("resource")
	@Override
	public void onCreate(Bundle savedInstanceState) {
		super.onCreate(savedInstanceState);
		web_pager = new ViewPager(this);
		web_pager_adapter = new WebPagerAdapter();
		setContentView(web_pager);
		
		String doc_path = "/sdcard/test.doc";
		
		try {
			file_md5 = getMD5(doc_path);
			//判断是否已经完全解码
			has_all_decoded = (new File(path_tmp + file_md5+ "/finish")).exists();
			
			makeTempFile();
			File dir_file = new File(path_tmp + file_md5);
			if (!dir_file.exists())
				dir_file.mkdir();
			
			if(has_all_decoded){
				byte buf[] = new byte[(int)(new File(path_tmp + file_md5+ "/finish")).length()];
				new FileInputStream(path_tmp + file_md5+ "/finish").read(buf);
				String tmp = new String(buf);
				doc_pager = Integer.parseInt(tmp);
			} else {
				doc = getDocument(doc_path);
				doc_len = doc.characterLength();
				doc_pager = doc_len/range_size + (doc_len%range_size==0?0:1);
				decode_status = new boolean[doc_pager];
			}
		} catch (Exception e1) {
			e1.printStackTrace();
		}
		
		web_pager.setAdapter(web_pager_adapter);
		web_pager.setCurrentItem(0);
		web_pager.setOnPageChangeListener(new OnPageChangeListener() {
        	@Override
        	public void onPageSelected(int arg0) {
        		if(!has_all_decoded&&!decode_status[arg0])
        			new MyDecodeOnePagerThread(arg0).start();
        	}
			@Override
			public void onPageScrollStateChanged(int arg0) {}
			@Override
			public void onPageScrolled(int arg0, float arg1, int arg2) {}
        });
		
		if(!has_all_decoded)
			new MyDecodeMainThread().start();
	}
	
	@Override  
   	protected void onDestroy() {
		is_exited = true;
		//直到确定主线程结束后才销毁自身
		synchronized (lock) {
			super.onDestroy();
		}
   	}
	
	@Override
	public boolean onCreateOptionsMenu(Menu menu) {
		getMenuInflater().inflate(R.menu.activity_main, menu);
		return true;
	}
	
	boolean decode_status[] = null;
	class MyDecodeMainThread extends Thread {
		public void run() {
			try {
				synchronized (lock) {
					for (int i = 0; i < doc_pager; i++) {
						while (is_decode_onepager != 0)
							Thread.sleep(1000);
						
						if (is_exited)
							return;

						doc2html(i);
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
			super.run();
		}
	}
	
	//单页解码子线程的计数器
	int is_decode_onepager = 0;
	class MyDecodeOnePagerThread extends Thread {
		private int pager;
		public MyDecodeOnePagerThread(int _pager){
			pager = _pager;
			is_decode_onepager++;
		}
		public void run() {
			try {
				doc2html(pager);
			} catch (Exception e) {
				e.printStackTrace();
			}
			is_decode_onepager--;
		}
	}
	
	//主线程的消息队列处理器
	MyHandler main_thread_handler = new MyHandler(this);
	
	static class MyHandler extends Handler {
		WeakReference<MainActivity> mActivity;
		public MyHandler(MainActivity activity) {
			mActivity = new WeakReference<MainActivity>(activity);
		}
		@Override
		public void handleMessage(Message msg) {
			if(((String)msg.getData().get("msg")).equals("update")){
				MainActivity main = mActivity.get();
				
				String url="file:///sdcard/cloudshare/temp/" + main.file_md5 + "/doc" + msg.getData().getString("pager") + ".html";
				main.web_now.loadUrl(url);
			}
			super.handleMessage(msg);
		}
	}
	
	private HWPFDocument getDocument(String file) throws IOException {
		return new HWPFDocument(new POIFSFileSystem(new FileInputStream(file)));
	}
	
	public void makeTempFile() {
		try {
			File dirFile = new File(path_tmp);
			if (!dirFile.exists())
				dirFile.mkdir();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	private final String path_tmp = "/sdcard/cloudshare/temp/";
	@SuppressWarnings("resource")
	public void doc2html(int pager_num) throws IOException, ParserConfigurationException, TransformerException {
		String doc_path = path_tmp + file_md5 + "/doc" + String.valueOf(pager_num) + ".html";
		File doc_file = new File(doc_path);
		if(doc_file.exists()||decode_status[pager_num]) {
			decode_status[pager_num] = true;
			return;			//如果该页已经解码则直接返回
		}
		
		decode_status[pager_num] = true;
		
		Range range = new Range(pager_num*range_size, (pager_num+1)*range_size, doc);
		WordToHtmlConverter wordToHtmlConverter;
		wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
			public String savePicture(byte[] content, PictureType pictureType,
					String suggestedName, float widthInches, float heightInches) {
				return path_tmp + file_md5 + "/img_" + suggestedName;
			}
		});
		
		wordToHtmlConverter.processDocumentPart(doc,range);
		List<Picture> pics = doc.getPicturesTable().getAllPictures();
		if (pics != null) {
			for(Iterator<Picture> iter = pics.iterator(); iter.hasNext();) {
				Picture p = (Picture)iter.next();
				try {
					p.writeImageContent(new FileOutputStream(path_tmp + file_md5 + "/img_" + p.suggestFullFileName()));
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				}
			}
		}
		
		org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
		ByteArrayOutputStream outStream = new ByteArrayOutputStream();
		DOMSource domSource = new DOMSource(htmlDocument);
		StreamResult streamResult = new StreamResult(outStream);
		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
		serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "html");
		serializer.transform(domSource, streamResult);
		outStream.close();

		//将边间距设置为0
		byte content[] = outStream.toByteArray();
		content[147] = ' ';
		
		new FileOutputStream(doc_path).write(content);
		
		if(pager_num==doc_pager-1){
			String finish_file_path = path_tmp + file_md5 + "/finish";
			new FileOutputStream(finish_file_path).write(String.valueOf(doc_pager).getBytes());
		}
	}
	
	//自定义分页view配适器
    private class WebPagerAdapter extends PagerAdapter{
    	public WebView web[];
    	public WebPagerAdapter(){
    		web = new WebView[4];
    		web[0] = new WebView(MainActivity.this);
    		web[1] = new WebView(MainActivity.this);
    		web[2] = new WebView(MainActivity.this);
    		web[3] = new WebView(MainActivity.this);
    	}
		public void destroyItem(View arg0, int arg1, Object arg2) {
			((ViewPager) arg0).removeView(web[arg1%4]);
		}

		public int getCount() {
			return doc_pager;
		}

		public Object instantiateItem(View arg0, int arg1) {
			int position = arg1%4;
			((ViewPager) arg0).addView(web[position],0);
			
			String url="file://" + path_tmp + file_md5 + "/doc" + String.valueOf(arg1) + ".html";
			web[position].loadUrl(url);
			
			return web[position];
		}

		public boolean isViewFromObject(View arg0, Object arg1) {
			return arg0==(arg1);
		}
    }
	
	
	static char hexdigits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
	public static String getMD5(String file) {
		FileInputStream fis = null;
		try {
			fis = new FileInputStream(file);
			MessageDigest md = MessageDigest.getInstance("MD5");
			byte[] buffer = new byte[2048];
			int length = -1;
			while ((length = fis.read(buffer)) != -1) {
				md.update(buffer, 0, length);
			}
			byte[] b = md.digest();
			return byteToHexString(b);
			// 16位加密
			// return buf.toString().substring(8, 24);
		} catch (Exception ex) {
			ex.printStackTrace();
			return null;
		} finally {
			try {
				fis.close();
			} catch (IOException ex) {
				ex.printStackTrace();
			}
		}
	}

	private static String byteToHexString(byte[] tmp) {
		String s; 
		char str[] = new char[16 * 2];
		int k = 0; 
		for (int i = 0; i < 16; i++) {
			byte byte0 = tmp[i];
			str[k++] = hexdigits[byte0 >>> 4 & 0xf]; 
			str[k++] = hexdigits[byte0 & 0xf]; 
		}
			s = new String(str); 
			return s;
	}
}


 

转载于:https://my.oschina.net/gal/blog/200192

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值