java word转pdf

最近在做一个word转pdf预览的功能 不多说直接贴码

先由doc转html

原文:http://blog.csdn.net/fyqcdbdx/article/details/7630122 

package com.keji09.erp.controller;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
  
  
public class WordExcelToHtml {  
  
    /** 
     * 回车符ASCII码 
     */  
    private static final short ENTER_ASCII = 13;  
  
    /** 
     * 空格符ASCII码 
     */  
    private static final short SPACE_ASCII = 32;  
  
    /** 
     * 水平制表符ASCII码 
     */  
    private static final short TABULATION_ASCII = 9;  
    
    /** 
     * 字符格式 (选择支持中文的字体)
     */  
    private static final String FONT_FAMLIAY = "SimSun"; 
    
    public static String htmlText = "";  
    public static String htmlTextTbl = "";  
    public static int counter=0;  
    public static int beginPosi=0;  
    public static int endPosi=0;  
    public static int beginArray[];  
    public static int endArray[];  
    public static String htmlTextArray[];  
    public static boolean tblExist=false;  
      
    public static final String inputFile="D:\\testword.doc";  
    public static void main(String argv[])  
    {         
        try {  
            getWordAndStyle(inputFile);  
        } catch (Exception e) {  
            // TODO Auto-generated catch block   
            e.printStackTrace();  
        }  
    }  
      
    /** 
     * 读取每个文字样式 
     *  
     * @param fileName 
     * @throws Exception 
     */  
  
      
    public static void getWordAndStyle(String fileName) throws Exception {  
        FileInputStream in = new FileInputStream(new File(fileName));  
        HWPFDocument doc = new HWPFDocument(in);  
          
         Range rangetbl = doc.getRange();//得到文档的读取范围      
         TableIterator it = new TableIterator(rangetbl);   
         int num=100;          
          
           
         beginArray=new int[num];  
         endArray=new int[num];  
         htmlTextArray=new String[num];  
           
           
           
           
           
  
        // 取得文档中字符的总数   
        int length = doc.characterLength();  
        // 创建图片容器   
        PicturesTable pTable = doc.getPicturesTable();  
          
        htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";  
        // 创建临时字符串,好加以判断一串字符是否存在相同格式   
          
         if(it.hasNext())  
         {  
             readTable(it,rangetbl);  
         }  
           
         int cur=0;  
              
        String tempString = "";  
        for (int i = 0; i < length - 1; i++) {  
            // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围   
            Range range = new Range(i, i + 1, doc);  
              
              
              
            CharacterRun cr = range.getCharacterRun(0);   
            //beginArray=new int[num];   
             //endArray=new int[num];   
             //htmlTextArray=new String[num];   
            if(tblExist)  
            {  
                if(i==beginArray[cur])  
                {          
                    htmlText+=tempString+htmlTextArray[cur];  
                    tempString="";  
                    i=endArray[cur]-1;  
                    cur++;  
                    continue;  
                }  
            }  
            if (pTable.hasPicture(cr)) {  
                htmlText +=  tempString ;                 
                // 读写图片                
                readPicture(pTable, cr);  
                tempString = "";                  
            }   
            else {  
                          
                Range range2 = new Range(i + 1, i + 2, doc);  
                // 第二个字符   
                CharacterRun cr2 = range2.getCharacterRun(0);  
                char c = cr.text().charAt(0);  
                  
                System.out.println(i+"::"+range.getEndOffset()+"::"+range.getStartOffset()+"::"+c);  
                  
                // 判断是否为回车符   
                if (c == ENTER_ASCII)  
                    {  
                    tempString += "<br/>";  
                      
                    }  
                // 判断是否为空格符   
                else if (c == SPACE_ASCII)  
                    tempString += " ";  
                // 判断是否为水平制表符   
                else if (c == TABULATION_ASCII)  
                    tempString += "    ";  
                // 比较前后2个字符是否具有相同的格式   
                boolean flag = compareCharStyle(cr, cr2);  
                if (flag)  
                    tempString += cr.text();  
                else {  
                    String fontStyle = "<span style=\"font-family:" + FONT_FAMLIAY + ";font-size:" + cr.getFontSize() / 2 + "pt;";  
                                      
                    if (cr.isBold())  
                        fontStyle += "font-weight:bold;";  
                    if (cr.isItalic())  
                        fontStyle += "font-style:italic;";  
                      
                    htmlText += fontStyle + "\" mce_style=\"font-family:" + FONT_FAMLIAY + ";font-size:" + cr.getFontSize() / 2 + "pt;";  
                                      
                    if (cr.isBold())  
                        fontStyle += "font-weight:bold;";  
                    if (cr.isItalic())  
                        fontStyle += "font-style:italic;";  
                      
                    htmlText += fontStyle + "\">" + tempString + cr.text() + "</span>";  
                    tempString = "";  
                }  
            }  
        }  
  
        htmlText += tempString+"</body></html>";  
        writeFile(htmlText);  
    }  
      
    /** 
     * 读写文档中的表格 
     *  
     * @param pTable 
     * @param cr 
     * @throws Exception 
     */  
    public static void readTable(TableIterator it, Range rangetbl) throws Exception {  
  
        htmlTextTbl="";  
        //迭代文档中的表格     
          
        counter=-1;  
        while (it.hasNext())   
        {   
            tblExist=true;  
             htmlTextTbl="";  
             Table tb = (Table) it.next();      
             beginPosi=tb.getStartOffset() ;  
             endPosi=tb.getEndOffset();  
               
             System.out.println("............"+beginPosi+"...."+endPosi);  
             counter=counter+1;  
             //迭代行,默认从0开始   
             beginArray[counter]=beginPosi;  
             endArray[counter]=endPosi;  
               
             htmlTextTbl+="<table border>";  
             for (int i = 0; i < tb.numRows(); i++) {        
             TableRow tr = tb.getRow(i);     
             if((i%2)==0){
            	 htmlTextTbl+="<tr>";  
             }else{
            	 htmlTextTbl+="</tr>"; 
             }
             
             //迭代列,默认从0开始      
             for (int j = 0; j < tr.numCells(); j++) {        
                 TableCell td = tr.getCell(j);//取得单元格   
                 int cellWidth=td.getWidth();  
                   
                 //取得单元格的内容      
                 for(int k=0;k<td.numParagraphs();k++){        
                         Paragraph para =td.getParagraph(k);        
                         String s = para.text().toString().trim();     
                         if(s=="")  
                         {  
                             s=" ";  
                         }  
                         System.out.println(s);     
                         htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";  
                         System.out.println(i+":"+j+":"+cellWidth+":"+s);  
                    } //end for          
                 }   //end for      
              }   //end for  
             
            htmlTextTbl+="</table>" ;      
            htmlTextArray[counter]=htmlTextTbl;  
    
        } //end while    
    }     
      
    /** 
     * 读写文档中的图片 
     *  
     * @param pTable 
     * @param cr 
     * @throws Exception 
     */  
    public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {  
        // 提取图片   
        Picture pic = pTable.extractPicture(cr, false);  
        // 返回POI建议的图片文件名   
        String afileName = pic.suggestFullFileName();  
        OutputStream out = new FileOutputStream(new File("d://test" + File.separator + afileName));  
        pic.writeImageContent(out);  
        htmlText += "<img src=\"d://test//" + afileName + "\" mce_src=\"d://test//" + afileName + "\"/>";   
    }  
  
    public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2)   
    {  
        boolean flag = false;  
        if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) && cr1.getFontSize() == cr2.getFontSize())   
        {  
            flag = true;  
        }  
        return flag;  
    }  
      
  
    /** 
     * 写文件 
     *  
     * @param s 
     */  
    public static void writeFile(String s) {  
        FileOutputStream fos = null;  
        BufferedWriter bw = null;  
        try {  
            File file = new File("d://abc.html");  
            fos = new FileOutputStream(file);  
            bw = new BufferedWriter(new OutputStreamWriter(fos));  
            bw.write(s);  
        } catch (FileNotFoundException fnfe) {  
            fnfe.printStackTrace();  
        } catch (IOException ioe) {  
            ioe.printStackTrace();  
        } finally {  
            try {  
                if (bw != null)  
                    bw.close();  
                if (fos != null)  
                    fos.close();  
            } catch (IOException ie) {  
            }  
        }  
    }
 }   
再由html 转pdf

package com.keji09.erp.controller;
      
import org.apache.poi.hwpf.HWPFDocument;  
import org.apache.poi.hwpf.converter.PicturesManager;  
import org.apache.poi.hwpf.converter.WordToHtmlConverter;  
import org.apache.poi.hwpf.usermodel.Picture;  
import org.apache.poi.hwpf.usermodel.PictureType;  
import org.jsoup.Jsoup;   
import org.w3c.dom.Document;  
  
import javax.xml.parsers.DocumentBuilderFactory;  
import javax.xml.parsers.ParserConfigurationException;  
import javax.xml.transform.OutputKeys;  
import javax.xml.transform.Transformer;  
import javax.xml.transform.TransformerException;  
import javax.xml.transform.TransformerFactory;  
import javax.xml.transform.dom.DOMSource;  
import javax.xml.transform.stream.StreamResult;  
import java.io.*;  
import java.util.List;  
      
    /** 
     * Created by Carey on 15-2-2. 
     */  
    public class Word2Html {  
      
      
        public static void main(String argv[]) {  
            try {  
                convert2Html("D:\\testword.doc","D:\\1.html");  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
        }  
      
        //输出html文件   
        public static void writeFile(String content, String path) {  
            FileOutputStream fos = null;   

//[java] view plaincopy

            BufferedWriter bw = null;  
            org.jsoup.nodes.Document doc = Jsoup.parse(content);  
             content=doc.html();  
             System.out.println(content);
            try {  
                File file = new File(path);  
                fos = new FileOutputStream(file);  
                bw = new BufferedWriter(new OutputStreamWriter(fos,"GBK"));  
                bw.write(content);  
            } catch (FileNotFoundException fnfe) {  
                fnfe.printStackTrace();  
            } catch (IOException ioe) {  
                ioe.printStackTrace();  
            } finally {  
                try {  
                    if (bw != null)  
                        bw.close();  
                    if (fos != null)  
                        fos.close();  
                } catch (IOException ie) {  
                }  
            }  
        }  
      
        //word 转 html   
        public static void convert2Html(String fileName, String outPutFile)  
                throws TransformerException, IOException,  
                ParserConfigurationException {  
      
            HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
            //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));  
             //兼容2007 以上版本  
    //        XSSFWorkbook  xssfwork=new XSSFWorkbook(new FileInputStream(fileName));  
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(  
                    DocumentBuilderFactory.newInstance().newDocumentBuilder()  
                            .newDocument());  
            wordToHtmlConverter.setPicturesManager( new PicturesManager()  
	            {  
	                public String savePicture( byte[] content,  
	                                           PictureType pictureType, String suggestedName,  
	                                           float widthInches, float heightInches )  
	                {  
	                    return "test/"+suggestedName;  
	                }  
	            } );  
            wordToHtmlConverter.processDocument(wordDocument);  
            //save pictures  
            List pics=wordDocument.getPicturesTable().getAllPictures();  
            if(pics!=null){  
                for(int i=0;i<pics.size();i++){  
                    Picture pic = (Picture)pics.get(i);  
                    System.out.println();  
                    try {  
                        pic.writeImageContent(new FileOutputStream("D:/test/"  
                                + pic.suggestFullFileName()));  
                    } catch (FileNotFoundException e) {  
                        e.printStackTrace();  
                    }  
                }  
            }  
            Document htmlDocument = wordToHtmlConverter.getDocument();  
      
            ByteArrayOutputStream out = new ByteArrayOutputStream();  
            DOMSource domSource = new DOMSource(htmlDocument);  
            StreamResult streamResult = new StreamResult(out);  
      
      
            TransformerFactory tf = TransformerFactory.newInstance();  
            Transformer serializer = tf.newTransformer();  
            serializer.setOutputProperty(OutputKeys.ENCODING, "GBK");  
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");  
            serializer.setOutputProperty(OutputKeys.METHOD, "HTML");  
            serializer.transform(domSource, streamResult);  
            out.close();  
            System.out.println(new String(out.toByteArray()));
            writeFile(new String(out.toByteArray()), outPutFile);  
        }  
    }  


原文:点击打开链接


最后发现对docx转换行不通  而且项目还有execl转换  之后还是换了openoffice



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值