java 远程本地读取word

package com.daren.poi.word;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;

 

public class MSWordExtractor
{
private HWPFDocument msWord;
private HttpURLConnection connection;
private InputStream inputStream;

/*
* 加载HTTP形式的Word文件
*
* */
public void initHttpExtractor(String fileurl)
{
try
{
URL url= new URL(fileurl);
connection=(HttpURLConnection)url.openConnection();
connection.connect();
inputStream=connection.getInputStream();
msWord = new HWPFDocument(inputStream);

} catch (Exception e) {

e.printStackTrace();
}
}

/*
* 加载本地的Word文件
*
* */
public void initLocalExtractor(String filepath)
{
try
{
inputStream = new FileInputStream(filepath);
msWord = new HWPFDocument(inputStream);
}
catch (Exception e)
{
e.printStackTrace();
}
}

/*
* 读取完word资源后,释放应该释放的对象
*
* */
public void destory()
{
try
{
if(connection!=null)
{
connection.disconnect();
}
if(inputStream!=null)
{
inputStream.close();
}
} catch (Exception e) {

e.printStackTrace();
}
}

/*
* 获取所有的段落文字
*
* */
public String[] getParagraphTexts()
{
Range range = msWord.getRange();
int numParagraph = range.numParagraphs();
String[] paragraphs = new String[numParagraph];
for (int i = 0; i < numParagraph; i++)
{
Paragraph p = range.getParagraph(i);
paragraphs[i]= new String(p.text());
}
return paragraphs;
}

/*
* 获取Word的所有文字
*
* */
public String getMSWordText()
{
return msWord.getRange().text();
}

//将图片保存到指定的目录,并且将图片内容替换成图片的名字
public void extractImages(String directory)
{
try
{
PicturesTable pTable = msWord.getPicturesTable();
int numCharacterRuns = msWord.getRange().numCharacterRuns();
for (int i = 0; i < numCharacterRuns; i++)
{
CharacterRun characterRun = msWord.getRange().getCharacterRun(i);
if (pTable.hasPicture(characterRun))
{
Picture pic = pTable.extractPicture(characterRun, false);
String fileName = pic.suggestFullFileName();
OutputStream out = new FileOutputStream(new File(directory+ File.separator + fileName));
pic.writeImageContent(out);
characterRun.replaceText(characterRun.text(), fileName);
}
}
}
catch (Exception e)
{

e.printStackTrace();
}
}


public static void main(String args[])
{
String httpfile="http://bus.vodone.com:8080/ids/test.doc";
MSWordExtractor mshttp=new MSWordExtractor();
mshttp.initHttpExtractor(httpfile);
System.out.println("[===]\n"+mshttp.getMSWordText());
mshttp.extractImages("C:\\");
System.out.println("[===]\n"+mshttp.getMSWordText());
mshttp.destory();

String localfile="c:\\test.doc";
MSWordExtractor lochttp=new MSWordExtractor();
lochttp.initLocalExtractor(localfile);
System.out.println("[===]\n"+lochttp.getMSWordText());
lochttp.extractImages("C:\\");
System.out.println("[===]\n"+lochttp.getMSWordText());
mshttp.destory();

}
}

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值