由于涉及到当前正在做的一个项目,所以并没有怎样整理就放上来的,相信大部分人都看得懂,至于POI中wordToHtmlConverter的processDocument出错的原因及修改办法在我的笔记中提及(http://blog.csdn.net/syfyw/article/details/8145653),请修改源码并重新编译即可.
该解决方案还处于未完成状态,希望在android下解析word文档遇到困难的朋友能从中得到灵感.
至于另外一个解决方案:http://blog.csdn.net/paulluo0739/article/details/6611327虽然自定义度广,但是对于文档的还原度很低,包括字体颜色,大小以及数字表头等基本上没有还原,建议希望能够高度还原文档的朋友还是采用poi的wordToHtmlConverter类吧.
package com.fs.poitest;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.security.MessageDigest;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import android.media.ThumbnailUtils;
import android.os.Bundle;
import android.os.Handler;
import android.os.Looper;
import android.os.Message;
import android.annotation.SuppressLint;
import android.app.Activity;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.support.v4.view.PagerAdapter;
import android.support.v4.view.ViewPager;
import android.support.v4.view.ViewPager.OnPageChangeListener;
import android.view.Gravity;
import android.view.Menu;
import android.view.View;
import android.view.ViewGroup;
import android.webkit.WebSettings.ZoomDensity;
import android.webkit.WebView;
import android.widget.BaseAdapter;
import android.widget.GridView;
import android.widget.ImageView;
import android.widget.LinearLayout;
import android.widget.ListView;
import android.widget.TextView;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@SuppressLint("SdCardPath")
public class MainActivity extends Activity {
private ViewPager web_pager;
private WebPagerAdapter web_pager_adapter;
public WebView web_now;
private final int range_size = 2000;
private HWPFDocument doc = null;
private String file_md5 = null;
private int doc_len = 0;
private int doc_pager = 0;
private String lock = new String();
private boolean is_exited = false;
private boolean has_all_decoded = false;
@SuppressWarnings("resource")
@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
web_pager = new ViewPager(this);
web_pager_adapter = new WebPagerAdapter();
setContentView(web_pager);
String doc_path = "/sdcard/test.doc";
try {
file_md5 = getMD5(doc_path);
//判断是否已经完全解码
has_all_decoded = (new File(path_tmp + file_md5+ "/finish")).exists();
makeTempFile();
File dir_file = new File(path_tmp + file_md5);
if (!dir_file.exists())
dir_file.mkdir();
if(has_all_decoded){
byte buf[] = new byte[(int)(new File(path_tmp + file_md5+ "/finish")).length()];
new FileInputStream(path_tmp + file_md5+ "/finish").read(buf);
String tmp = new String(buf);
doc_pager = Integer.parseInt(tmp);
} else {
doc = getDocument(doc_path);
doc_len = doc.characterLength();
doc_pager = doc_len/range_size + (doc_len%range_size==0?0:1);
decode_status = new boolean[doc_pager];
}
} catch (Exception e1) {
e1.printStackTrace();
}
web_pager.setAdapter(web_pager_adapter);
web_pager.setCurrentItem(0);
web_pager.setOnPageChangeListener(new OnPageChangeListener() {
@Override
public void onPageSelected(int arg0) {
if(!has_all_decoded&&!decode_status[arg0])
new MyDecodeOnePagerThread(arg0).start();
}
@Override
public void onPageScrollStateChanged(int arg0) {}
@Override
public void onPageScrolled(int arg0, float arg1, int arg2) {}
});
if(!has_all_decoded)
new MyDecodeMainThread().start();
}
@Override
protected void onDestroy() {
is_exited = true;
//直到确定主线程结束后才销毁自身
synchronized (lock) {
super.onDestroy();
}
}
@Override
public boolean onCreateOptionsMenu(Menu menu) {
getMenuInflater().inflate(R.menu.activity_main, menu);
return true;
}
boolean decode_status[] = null;
class MyDecodeMainThread extends Thread {
public void run() {
try {
synchronized (lock) {
for (int i = 0; i < doc_pager; i++) {
while (is_decode_onepager != 0)
Thread.sleep(1000);
if (is_exited)
return;
doc2html(i);
}
}
} catch (Exception e) {
e.printStackTrace();
}
super.run();
}
}
//单页解码子线程的计数器
int is_decode_onepager = 0;
class MyDecodeOnePagerThread extends Thread {
private int pager;
public MyDecodeOnePagerThread(int _pager){
pager = _pager;
is_decode_onepager++;
}
public void run() {
try {
doc2html(pager);
} catch (Exception e) {
e.printStackTrace();
}
is_decode_onepager--;
}
}
//主线程的消息队列处理器
MyHandler main_thread_handler = new MyHandler(this);
static class MyHandler extends Handler {
WeakReference<MainActivity> mActivity;
public MyHandler(MainActivity activity) {
mActivity = new WeakReference<MainActivity>(activity);
}
@Override
public void handleMessage(Message msg) {
if(((String)msg.getData().get("msg")).equals("update")){
MainActivity main = mActivity.get();
String url="file:///sdcard/cloudshare/temp/" + main.file_md5 + "/doc" + msg.getData().getString("pager") + ".html";
main.web_now.loadUrl(url);
}
super.handleMessage(msg);
}
}
private HWPFDocument getDocument(String file) throws IOException {
return new HWPFDocument(new POIFSFileSystem(new FileInputStream(file)));
}
public void makeTempFile() {
try {
File dirFile = new File(path_tmp);
if (!dirFile.exists())
dirFile.mkdir();
} catch (Exception e) {
e.printStackTrace();
}
}
private final String path_tmp = "/sdcard/cloudshare/temp/";
@SuppressWarnings("resource")
public void doc2html(int pager_num) throws IOException, ParserConfigurationException, TransformerException {
String doc_path = path_tmp + file_md5 + "/doc" + String.valueOf(pager_num) + ".html";
File doc_file = new File(doc_path);
if(doc_file.exists()||decode_status[pager_num]) {
decode_status[pager_num] = true;
return; //如果该页已经解码则直接返回
}
decode_status[pager_num] = true;
Range range = new Range(pager_num*range_size, (pager_num+1)*range_size, doc);
WordToHtmlConverter wordToHtmlConverter;
wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return path_tmp + file_md5 + "/img_" + suggestedName;
}
});
wordToHtmlConverter.processDocumentPart(doc,range);
List<Picture> pics = doc.getPicturesTable().getAllPictures();
if (pics != null) {
for(Iterator<Picture> iter = pics.iterator(); iter.hasNext();) {
Picture p = (Picture)iter.next();
try {
p.writeImageContent(new FileOutputStream(path_tmp + file_md5 + "/img_" + p.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
//将边间距设置为0
byte content[] = outStream.toByteArray();
content[147] = ' ';
new FileOutputStream(doc_path).write(content);
if(pager_num==doc_pager-1){
String finish_file_path = path_tmp + file_md5 + "/finish";
new FileOutputStream(finish_file_path).write(String.valueOf(doc_pager).getBytes());
}
}
//自定义分页view配适器
private class WebPagerAdapter extends PagerAdapter{
public WebView web[];
public WebPagerAdapter(){
web = new WebView[4];
web[0] = new WebView(MainActivity.this);
web[1] = new WebView(MainActivity.this);
web[2] = new WebView(MainActivity.this);
web[3] = new WebView(MainActivity.this);
}
public void destroyItem(View arg0, int arg1, Object arg2) {
((ViewPager) arg0).removeView(web[arg1%4]);
}
public int getCount() {
return doc_pager;
}
public Object instantiateItem(View arg0, int arg1) {
int position = arg1%4;
((ViewPager) arg0).addView(web[position],0);
String url="file://" + path_tmp + file_md5 + "/doc" + String.valueOf(arg1) + ".html";
web[position].loadUrl(url);
return web[position];
}
public boolean isViewFromObject(View arg0, Object arg1) {
return arg0==(arg1);
}
}
static char hexdigits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
public static String getMD5(String file) {
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] buffer = new byte[2048];
int length = -1;
while ((length = fis.read(buffer)) != -1) {
md.update(buffer, 0, length);
}
byte[] b = md.digest();
return byteToHexString(b);
// 16位加密
// return buf.toString().substring(8, 24);
} catch (Exception ex) {
ex.printStackTrace();
return null;
} finally {
try {
fis.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
private static String byteToHexString(byte[] tmp) {
String s;
char str[] = new char[16 * 2];
int k = 0;
for (int i = 0; i < 16; i++) {
byte byte0 = tmp[i];
str[k++] = hexdigits[byte0 >>> 4 & 0xf];
str[k++] = hexdigits[byte0 & 0xf];
}
s = new String(str);
return s;
}
}