1原理
用词典中的词对来替换目标数据的中文全部换成英文
2效果图
2.1词典
2.2源数据
2.3结果数据
3代码实现
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
* @author ZTX
*标签词典替换:(中文换成英文)
1、得到词典中中文词长,词长按长度大小降序排列得到新词典
2、读取目标数据中文,在新词典中依次查找,替换成英文
*/
public class replace {
private static String WordPath="./data/标签词典-待核查.xls";
private static String DB="./data/test5.xls";
public static void main (String args[]){
HSSFWorkbook workbook=null;
Map<String ,String> wordMap=new HashMap<String ,String>();//词典Map,所有词
Map<Map ,Integer> wordLenMap=new HashMap<Map,Integer>();//排序Map
Map<String ,String> newDec=new HashMap<String ,String>();//新词典Map
String eng,chi;
int chiLen=0;
workbook=getDBwb(WordPath);
HSSFSheet sheet=workbook.getSheetAt(0);
HSSFSheet newdec=workbook.createSheet();
int rowNum=sheet.getLastRowNum();//获得总行数
// 获得行记录
int i,j;
for (i=0;i<=rowNum;i++){//从第一行读数据而第二行则i=1
HSSFRow row=sheet.getRow(i);
Map<String ,String> wordDui=new HashMap<String ,String>();//词对Map,一个词对
try {
chi=getValue(row.getCell(0)).trim();//中文
eng=getValue(row.getCell(1)).trim();//英文
} catch (Exception e) {
//e.printStackTrace();
System.out.println("空");
i++;
continue;//空行不管
}
chiLen=chi.length();
//System.out.println("E:"+eng+"\tC:"+chi+"\tCLen:"+chiLen+"\tLine:"+i);
//中文无重,中文做key值
wordMap.put(chi,eng );//静态的词典
wordDui.put(chi, eng);//动态的词对
wordLenMap.put(wordDui,chiLen);//词长+词对
}
List<Map.Entry<Map,Integer>> mappingList = null; //指定map格式的map节点
//通过ArrayList构造函数把map.entrySet()转换成list
mappingList = new ArrayList<Map.Entry<Map,Integer>>(wordLenMap.entrySet());
//通过比较器实现比较排序
Collections.sort(mappingList, new Comparator<Map.Entry<Map,Integer>>(){
public int compare(Map.Entry<Map,Integer> mapping1,Map.Entry<Map,Integer> mapping2){
return mapping1.getValue().compareTo(mapping2.getValue());
}
});
//写回XXX暂时不用了 数据量小用newDec
int rown=0;
HSSFRow newRow;
Map mapword;
for(Entry<Map,Integer> mapping:mappingList){
mapword=(Map)mapping.getKey();
int n =mapping.getValue();
String w= mapword.toString();
System.out.println(n);
System.out.println("e"+w);
System.out.println(n);
//遍历mapmap
Set<Entry<String,String>> s = mapword.entrySet();
Iterator<Entry<String,String>> inner = s.iterator(); //实际只有一条,while一次
while(inner.hasNext()) {
Entry<String, String> iworddui=inner.next();
try {
String list = (String)iworddui.getValue(); //eng
System.out.println("word-c"+list);
String list2 = (String)iworddui.getKey(); //chi
System.out.println("word-e"+list2);
newDec.put(list2, list);//make new dec
} catch (Exception e) {
//e.printStackTrace();
System.out.println("处理后空值跳过");
continue;
}
}
}
replaceTarget(newDec);
}
public static void replaceTarget(Map<String, String> newDec) {
//读目标excel并查中文在map中的英文
HSSFWorkbook work=getDBwb(DB);
//修改cell
HSSFSheet sheet=work.getSheetAt(0);
int rowN=sheet.getLastRowNum();//获得总行数
int coloumNum=sheet.getRow(3).getPhysicalNumberOfCells();//获得总列数
// 获得行记录
int i,j;
for (i=1;i<=rowN;i++){//行
HSSFRow row=sheet.getRow(i+1);
if(row==null)continue;
//第2列 第5列
if(row.getCell(1) != null){
row.getCell(1).removeCellComment();
row.getCell(1).setCellValue( replaceWord(newDec,row.getCell(1).toString()) );
}else{
System.out.println("blank");
}
if(row.getCell(4) != null){
row.getCell(4).removeCellComment();
row.getCell(4).setCellValue( replaceWord(newDec,row.getCell(4).toString()) );
}else{
System.out.println("blank");
}
}
//要把结果写到文件中去
writeBack2Excel(DB, work);
}
public static String replaceWord(Map<String, String> newDec, String string) {
//TODO
//截取
String result="";
String[] cut=string.split("\\*");
//替换
for(int i=0;i<cut.length;i++){
if(newDec.get(cut[i])!=null){
cut[i]=newDec.get(cut[i]);
}
if(i==cut.length-1)result+=cut[i];
else
result+=cut[i]+"*";
}
//拼合
System.out.println("source"+string+"result"+result);
return result;
}
public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
FileOutputStream fileOut;
System.out.println("OK!...");
try {
fileOut = new FileOutputStream(DBurl);
wb.write(fileOut);//把Workbook对象输出到文件中
fileOut.flush();
fileOut.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getValue(HSSFCell cell){
String cellvalue = null;
switch (cell.getCellType()){
case HSSFCell.CELL_TYPE_NUMERIC:{
if (HSSFDateUtil.isCellDateFormatted(cell)){
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
Date date = cell.getDateCellValue();
cellvalue = sdf.format(date);
}
else{
Integer num = new Integer((int) cell.getNumericCellValue());
cellvalue = String.valueOf(num);
}
break;
}
case HSSFCell.CELL_TYPE_STRING:
// 取得当前的Cell字符串
cellvalue = cell.getStringCellValue().replaceAll("'", "''");
break;
}
return cellvalue;
}
public static void copyCell(HSSFCell cellCome,HSSFCell cellTo) {
String strCell = "";
switch (cellCome.getCellType()) {
case HSSFCell.CELL_TYPE_STRING: // 字符串
strCell = cellCome.getStringCellValue();
cellTo.setCellType(HSSFCell.CELL_TYPE_STRING);
cellTo.setCellValue(cellCome.getStringCellValue());
break;
case HSSFCell.CELL_TYPE_NUMERIC: // 数字
strCell = String.valueOf(cellCome.getNumericCellValue());
cellTo.setCellType(HSSFCell.CELL_TYPE_NUMERIC);
cellTo.setCellValue(cellCome.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN: // 布尔
strCell = String.valueOf(cellCome.getBooleanCellValue());
cellTo.setCellType(HSSFCell.CELL_TYPE_BOOLEAN);
cellTo.setCellValue(cellCome.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_BLANK: // 空
strCell = "";
cellTo.setCellType(HSSFCell.CELL_TYPE_BLANK);
break;
case HSSFCell.CELL_TYPE_FORMULA: // 公式日期??
strCell = String.valueOf(cellCome.getDateCellValue());
cellTo.setCellType(HSSFCell.CELL_TYPE_NUMERIC);
cellTo.setCellValue(cellCome.getNumericCellValue());
break;
default:
strCell = "";
cellTo.setCellType(HSSFCell.CELL_TYPE_BLANK);
break;
}
//System.out.println(strCell);
}
public static HSSFWorkbook getDBwb(String getDB) {
HSSFWorkbook workbook=null;
try {
FileInputStream fis=new FileInputStream(getDB);
workbook=new HSSFWorkbook(fis);
} catch (IOException e) {
e.printStackTrace();
}
return workbook;
}
}