1. 获得word的文本内容 无任何格式 样式 图片
try{
FileInputStream fis = new FileInputStream(new File("F://test.doc"));
WordExtractor ex = new WordExtractor(fis);
String text2003 = ex.getText();
System.out.println("word的内容信息"+text2003);
} catch (Exception e) {
e.printStackTrace();
System.out.println("解析word有错!");
}
2. 获得word的段落
File file = new File("F://test.doc");
try {
FileInputStream fis = new FileInputStream(file);
WordExtractor wordExtractor = new WordExtractor(fis);
StringBuffer sb = new StringBuffer();
System.out.println("【 使用getText()方法提取的Word文件的内容如下所示:】");
String[] paragraph = wordExtractor.getParagraphText();
System.out.println("该Word文件共有"+paragraph.length+"段。");
for(int i=0;i<paragraph.length;i++){
System.out.println("< 第 "+(i+1)+" 段的内容为 >");
System.out.println(paragraph[i]);
if(i==0){
sb.append("<center>");
sb.append(paragraph[i]);
sb.append("</center><br>");
}
else{
sb.append(" ");
sb.append(paragraph[i]);
System.out.println(paragraph[i].hashCode());
sb.append("<br>");
}
}
System.out.println(sb.toString());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
3.取得word内容的字体样式(只能一个一个字符的取字体的样式 效率将是一个大问题)
File file = new File("F://test.doc");
try {
int flg = 0;
String str="";
FileInputStream fis = new FileInputStream(file);
HWPFDocument doc = new HWPFDocument(fis);
StringBuffer sb = new StringBuffer();
sb.append("<center>");
int length = doc.characterLength();
for(int m =0;m<length-1;m++){
Range range = new Range(m,m+1,doc);
for(int j=0;j<range.numCharacterRuns();j++){
CharacterRun cr=range.getCharacterRun(j);
System.out.println(cr.getPicOffset()+" "+cr.getColor()+" "+cr.getFontName()+" "+cr.getFontSize()+" ");
if(range.text().hashCode() != 13){
sb.append("<font color='");
sb.append(cr.getColor());
sb.append("' style='font-size:");
sb.append(cr.getFontSize());
sb.append("pt;font-family:");
sb.append(cr.getFontName());
sb.append("'>");
sb.append(range.text());
sb.append("</font>");
//str = str + "<font color='"+cr.getColor()+"' style='font-size:"+cr.getFontSize()+"pt;font-family:"+cr.getFontName()+"'>"+range.text()+"</font>";
}
else{
if(flg == 0){
sb.append("</center><br> ");//