基于词典的翻译

1原理

用词典中的词对来替换目标数据的中文全部换成英文

2效果图

2.1词典

2.2源数据

2.3结果数据

3代码实现

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

/**
 * @author ZTX
 *标签词典替换:(中文换成英文)
1、得到词典中中文词长,词长按长度大小降序排列得到新词典
2、读取目标数据中文,在新词典中依次查找,替换成英文
 */

public class replace {
	private static String WordPath="./data/标签词典-待核查.xls";
	private static String DB="./data/test5.xls";	
public static void main (String args[]){
	HSSFWorkbook workbook=null;
	Map<String ,String> wordMap=new HashMap<String ,String>();//词典Map,所有词
	Map<Map ,Integer> wordLenMap=new HashMap<Map,Integer>();//排序Map
	Map<String ,String> newDec=new HashMap<String ,String>();//新词典Map
	String eng,chi;
	int chiLen=0;
	
	workbook=getDBwb(WordPath);
	HSSFSheet sheet=workbook.getSheetAt(0);
	HSSFSheet newdec=workbook.createSheet();
	int rowNum=sheet.getLastRowNum();//获得总行数
	// 获得行记录
	int i,j;
	for (i=0;i<=rowNum;i++){//从第一行读数据而第二行则i=1
		HSSFRow row=sheet.getRow(i);
		Map<String ,String> wordDui=new HashMap<String ,String>();//词对Map,一个词对
		try {
			chi=getValue(row.getCell(0)).trim();//中文
			eng=getValue(row.getCell(1)).trim();//英文
		} catch (Exception e) {
			//e.printStackTrace();
			System.out.println("空");
			i++;
			continue;//空行不管
		}
		chiLen=chi.length();
		//System.out.println("E:"+eng+"\tC:"+chi+"\tCLen:"+chiLen+"\tLine:"+i);
		//中文无重,中文做key值
		wordMap.put(chi,eng );//静态的词典
		wordDui.put(chi, eng);//动态的词对
		wordLenMap.put(wordDui,chiLen);//词长+词对
	}
				
	List<Map.Entry<Map,Integer>> mappingList = null; //指定map格式的map节点
	  //通过ArrayList构造函数把map.entrySet()转换成list 
	  mappingList = new ArrayList<Map.Entry<Map,Integer>>(wordLenMap.entrySet()); 
	  //通过比较器实现比较排序 
	  Collections.sort(mappingList, new Comparator<Map.Entry<Map,Integer>>(){ 
	   public int compare(Map.Entry<Map,Integer> mapping1,Map.Entry<Map,Integer> mapping2){ 
	    return mapping1.getValue().compareTo(mapping2.getValue()); 
	   } 
	  }); 
	  //写回XXX暂时不用了 数据量小用newDec
	  int rown=0;
	  HSSFRow newRow;
	  Map mapword;
	  for(Entry<Map,Integer> mapping:mappingList){ 
		  mapword=(Map)mapping.getKey();
		  int n =mapping.getValue();
		String w= mapword.toString();
		System.out.println(n);
		System.out.println("e"+w);
		System.out.println(n);
//遍历mapmap
  Set<Entry<String,String>> s = mapword.entrySet(); 
  Iterator<Entry<String,String>> inner = s.iterator(); //实际只有一条,while一次  
  while(inner.hasNext()) {  
	  Entry<String, String> iworddui=inner.next();
	  try {
		  String list = (String)iworddui.getValue(); //eng
	       System.out.println("word-c"+list);   
	     String list2 = (String)iworddui.getKey(); //chi  
	       System.out.println("word-e"+list2);
	       newDec.put(list2, list);//make new dec
	  } catch (Exception e) {
		  //e.printStackTrace();
		  System.out.println("处理后空值跳过");
		  continue;
	  } 
  }   
	  } 
	  replaceTarget(newDec);
}
public static void replaceTarget(Map<String, String> newDec) {
	//读目标excel并查中文在map中的英文
	  HSSFWorkbook work=getDBwb(DB);
	//修改cell
	  HSSFSheet sheet=work.getSheetAt(0);
		int rowN=sheet.getLastRowNum();//获得总行数
		 int coloumNum=sheet.getRow(3).getPhysicalNumberOfCells();//获得总列数
		// 获得行记录
		int i,j;
		for (i=1;i<=rowN;i++){//行
			HSSFRow row=sheet.getRow(i+1);
			if(row==null)continue;
			//第2列 第5列
				if(row.getCell(1) != null){
					row.getCell(1).removeCellComment();
					row.getCell(1).setCellValue( replaceWord(newDec,row.getCell(1).toString()) );
				}else{
					System.out.println("blank");
				}
				if(row.getCell(4) != null){
					row.getCell(4).removeCellComment();
					row.getCell(4).setCellValue( replaceWord(newDec,row.getCell(4).toString()) );
				}else{
					System.out.println("blank");
				}
		}
		//要把结果写到文件中去
		writeBack2Excel(DB, work);
}
public static String replaceWord(Map<String, String> newDec, String string) {
//TODO
	//截取
	String result="";
	String[] cut=string.split("\\*");
	//替换
	for(int i=0;i<cut.length;i++){
		if(newDec.get(cut[i])!=null){
			cut[i]=newDec.get(cut[i]);
		}
		if(i==cut.length-1)result+=cut[i];
		else
		result+=cut[i]+"*";
	}
	//拼合
	System.out.println("source"+string+"result"+result);
	return result;
}
public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
	 FileOutputStream fileOut;
	 System.out.println("OK!...");
	try {
		fileOut = new FileOutputStream(DBurl);		
		wb.write(fileOut);//把Workbook对象输出到文件中  
		fileOut.flush();              
		fileOut.close(); 
	} catch (FileNotFoundException e) {
		e.printStackTrace();
	} catch (IOException e) {
		e.printStackTrace();
	}  
}
public static String getValue(HSSFCell  cell){

	String cellvalue = null; 
	switch (cell.getCellType()){   
	  
	case HSSFCell.CELL_TYPE_NUMERIC:{   
		   
		if (HSSFDateUtil.isCellDateFormatted(cell)){  
			SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); 
			
			Date date = cell.getDateCellValue();   
			  
			cellvalue = sdf.format(date);
		}   
		  
		else{   
			
			Integer num = new Integer((int) cell.getNumericCellValue());   
			cellvalue = String.valueOf(num);   
		}   
		break;   
	}   
   
	case HSSFCell.CELL_TYPE_STRING:   
		// 取得当前的Cell字符串   
		cellvalue = cell.getStringCellValue().replaceAll("'", "''");   
		break;   
	}
	return cellvalue;	
}
public static void copyCell(HSSFCell cellCome,HSSFCell cellTo) {   
	String strCell = "";        
	switch (cellCome.getCellType()) {
	case HSSFCell.CELL_TYPE_STRING:  // 字符串         
		strCell = cellCome.getStringCellValue();  
		cellTo.setCellType(HSSFCell.CELL_TYPE_STRING);
		cellTo.setCellValue(cellCome.getStringCellValue());
		break;        
	case HSSFCell.CELL_TYPE_NUMERIC: // 数字
		strCell = String.valueOf(cellCome.getNumericCellValue()); 
		cellTo.setCellType(HSSFCell.CELL_TYPE_NUMERIC);
		cellTo.setCellValue(cellCome.getNumericCellValue());
		break;        
	case HSSFCell.CELL_TYPE_BOOLEAN: // 布尔     
		strCell = String.valueOf(cellCome.getBooleanCellValue()); 
		cellTo.setCellType(HSSFCell.CELL_TYPE_BOOLEAN);
		cellTo.setCellValue(cellCome.getBooleanCellValue());
		break;       
	case HSSFCell.CELL_TYPE_BLANK:   //  空     
		strCell = "";            
		cellTo.setCellType(HSSFCell.CELL_TYPE_BLANK);
		
		break;    
	case HSSFCell.CELL_TYPE_FORMULA:   //  公式日期??    
		strCell = String.valueOf(cellCome.getDateCellValue());  
		cellTo.setCellType(HSSFCell.CELL_TYPE_NUMERIC);
		cellTo.setCellValue(cellCome.getNumericCellValue());
		break; 
	default:     
		strCell = "";    
		cellTo.setCellType(HSSFCell.CELL_TYPE_BLANK);
		break;       
	}            
	//System.out.println(strCell);    
} 
public static HSSFWorkbook getDBwb(String getDB) {
	HSSFWorkbook workbook=null;
	try {
		
		  FileInputStream fis=new FileInputStream(getDB);
	       workbook=new HSSFWorkbook(fis); 

	} catch (IOException e) {
		e.printStackTrace();
	}
	return workbook;
}
}


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值