import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
public class Electronic {
public static void main(String[] args){
try{
String fileDir = "/home/work";
FileInputStream in = new FileInputStream(new File(fileDir
+ "/报告.doc"));
// HWPFDocument hdt = new HWPFDocument(in);
WordExtractor wordExtractor = new WordExtractor(in);
String resultinit = wordExtractor.getText();
// System.out.println(result);
Map<String,String> cells = new HashMap<String,String>();
Map<String,ArrayList<String>> resultMap = new HashMap<String,ArrayList<String>>();
String[] filename={"/home/work/1-095808.xls","/home/work/1-095823.xls","/home/work/1-095801.xls"};
cells=getMaterial(filename);
int offset=10;
int begin=0;
int end=0;
int docLength=resultinit.length();
System.out.println("Total Entry:"+cells.size());
int count=0;
for(Map.Entry<String, String> entry : cells.entrySet()){
String result= new String();
result=resultinit;
//System.out.print(entry.getKey()+": ");
String key=entry.getKey();
String[] values= entry.getValue().replaceAll("AC", "").split(",");
//System.out.println(entry.getValue());
//double count=0;
double length=values.length;
ArrayList<String> array= new ArrayList<String>();
String deletedianli=values[0].replaceAll("电力", "");
int keylength=values[0].length();
int index=result.indexOf(deletedianli);
String itermidate="";
if(index>0)
array.add(values[0]);
while(index>0){
//if(index!=-1){
begin=index-offset;
end=index+offset;
docLength=result.length();
if(begin<0)
begin=0;
if(end>docLength)
end=docLength;
itermidate=result.substring(index, end);
//count++;
//}
for(int i=1;i<length;i++){
if(values[i].isEmpty() || values[i].length()==0)
continue;
if(itermidate.indexOf(values[i])!=-1){
//System.out.print(values[i]+',');
if(!array.contains(values[i])){
//count++;
array.add(values[i]);
}
}
}
result=result.substring(index+keylength);
index=result.indexOf(deletedianli);
}
//System.out.println();
if((double)array.size()/length>=0.5)
resultMap.put(key, array);
count++;
if(count%1000==0)
System.out.println("Processed Records:"+count);
}
for(Map.Entry<String, ArrayList<String>> entry : resultMap.entrySet()){
System.out.print(entry.getKey()+":");
for(String str:entry.getValue())
System.out.print(str+",");
System.out.println();
}
}catch(Exception e){
e.printStackTrace();
}
}
public static Map<String,String> getMaterial(String[] filename){
Map<String,String> cells = new HashMap<String,String>();
try{
for(int i=0;i<filename.length;i++){
System.out.println("FileName is:"+filename[i]);
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(filename[i]));
HSSFSheet sheet = workbook.getSheet("Sheet1");
HSSFRow row ;
HSSFCell cell;
int lastRow=sheet.getLastRowNum();
System.out.println("Total records:"+lastRow);
for(short j=1;j<=lastRow;j++){
if(j%2000==0)
System.out.println("Maprecords:"+cells.size());
try{
row=sheet.getRow(j);
if(row==null)
continue;
String key=row.getCell(0).getStringCellValue();
String value=row.getCell(1).getStringCellValue();
cells.put(key, value);
}catch(Exception e){
e.printStackTrace();
System.out.println(filename[i]);
}
}
}
//for(Map.Entry<String, String> entry:cells.entrySet()){
// System.out.println(entry.getKey()+":"+entry.getValue());
//}
}catch(Exception e){
//e.printStackTrace();
}
return cells;
}
}
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
public class Electronic {
public static void main(String[] args){
try{
String fileDir = "/home/work";
FileInputStream in = new FileInputStream(new File(fileDir
+ "/报告.doc"));
// HWPFDocument hdt = new HWPFDocument(in);
WordExtractor wordExtractor = new WordExtractor(in);
String resultinit = wordExtractor.getText();
// System.out.println(result);
Map<String,String> cells = new HashMap<String,String>();
Map<String,ArrayList<String>> resultMap = new HashMap<String,ArrayList<String>>();
String[] filename={"/home/work/1-095808.xls","/home/work/1-095823.xls","/home/work/1-095801.xls"};
cells=getMaterial(filename);
int offset=10;
int begin=0;
int end=0;
int docLength=resultinit.length();
System.out.println("Total Entry:"+cells.size());
int count=0;
for(Map.Entry<String, String> entry : cells.entrySet()){
String result= new String();
result=resultinit;
//System.out.print(entry.getKey()+": ");
String key=entry.getKey();
String[] values= entry.getValue().replaceAll("AC", "").split(",");
//System.out.println(entry.getValue());
//double count=0;
double length=values.length;
ArrayList<String> array= new ArrayList<String>();
String deletedianli=values[0].replaceAll("电力", "");
int keylength=values[0].length();
int index=result.indexOf(deletedianli);
String itermidate="";
if(index>0)
array.add(values[0]);
while(index>0){
//if(index!=-1){
begin=index-offset;
end=index+offset;
docLength=result.length();
if(begin<0)
begin=0;
if(end>docLength)
end=docLength;
itermidate=result.substring(index, end);
//count++;
//}
for(int i=1;i<length;i++){
if(values[i].isEmpty() || values[i].length()==0)
continue;
if(itermidate.indexOf(values[i])!=-1){
//System.out.print(values[i]+',');
if(!array.contains(values[i])){
//count++;
array.add(values[i]);
}
}
}
result=result.substring(index+keylength);
index=result.indexOf(deletedianli);
}
//System.out.println();
if((double)array.size()/length>=0.5)
resultMap.put(key, array);
count++;
if(count%1000==0)
System.out.println("Processed Records:"+count);
}
for(Map.Entry<String, ArrayList<String>> entry : resultMap.entrySet()){
System.out.print(entry.getKey()+":");
for(String str:entry.getValue())
System.out.print(str+",");
System.out.println();
}
}catch(Exception e){
e.printStackTrace();
}
}
public static Map<String,String> getMaterial(String[] filename){
Map<String,String> cells = new HashMap<String,String>();
try{
for(int i=0;i<filename.length;i++){
System.out.println("FileName is:"+filename[i]);
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(filename[i]));
HSSFSheet sheet = workbook.getSheet("Sheet1");
HSSFRow row ;
HSSFCell cell;
int lastRow=sheet.getLastRowNum();
System.out.println("Total records:"+lastRow);
for(short j=1;j<=lastRow;j++){
if(j%2000==0)
System.out.println("Maprecords:"+cells.size());
try{
row=sheet.getRow(j);
if(row==null)
continue;
String key=row.getCell(0).getStringCellValue();
String value=row.getCell(1).getStringCellValue();
cells.put(key, value);
}catch(Exception e){
e.printStackTrace();
System.out.println(filename[i]);
}
}
}
//for(Map.Entry<String, String> entry:cells.entrySet()){
// System.out.println(entry.getKey()+":"+entry.getValue());
//}
}catch(Exception e){
//e.printStackTrace();
}
return cells;
}
}
用的jar包是poi-bin-3.10-FINAL-20140208.tar.gz
用的时候需要解压该压缩文件,然后把
poi-3.10-FINAL-20140208.jar
poi-scratchpad-3.10-FINAL-20140208.jar
和lib下的
commons-codec-1.5.jar commons-logging-1.1.jar log4j-1.2.13.jar 加入到工程里面