国际化批量转码程序
读取配置文件工具类,打包后正式可以使用:
package com.ywx.test.unicode;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
public class PropertyUtil {
private static Properties p = new Properties();
static {
String filePath = System.getProperty("user.dir") + File.separator + "config.properties";
System.out.println("文件路径为:" + filePath);
try {
FileInputStream in = new FileInputStream(filePath);
p.load(in);
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getProperty(String key) {
return p.getProperty(key);
}
public static String getProperty(String key, String defaultValue) {
return p.getProperty(key, defaultValue);
}
}
unicode转码工具类:
package com.ywx.test.unicode;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
public class UnicodeUtil {
/** 字母正则表达式. **/
private static String LETTLER = ".*[a-zA-Z]+.*";
/**
* 转码,如果是英文字母则不进行转操作.
*
* @param str
* 需要转码的字符串
* @return
*/
public static List<Integer> convert(String str) {
List<Integer> zmList = new ArrayList<Integer>();
for (int i = 0; i < str.length(); i++) {
char s = str.charAt(i);
Matcher m = Pattern.compile(LETTLER).matcher(String.valueOf(s));
if (m.matches()) {
zmList.add(i);
}
}
return zmList;
}
/**
* 完整的判断中文汉字和符号.
*/
public static boolean isChinese(String strName) {
char[] ch = strName.toCharArray();
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (isChinese(c)) {
return true;
}
}
return false;
}
/**
* 判断字符串是否包含字母.
*/
public static boolean isLetter(String strName) {
String regex = ".*[a-zA-Z]+.*";
Matcher m = Pattern.compile(regex).matcher(strName);
return m.matches();
}
/**
* 根据Unicode编码完美的判断中文汉字和符号.
*/
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
return true;
}
return false;
}
@Test
public void test1(){
List<Integer> zmList = new ArrayList<Integer>();
String LETTLER = ".*[a-zA-Z]+.*";
String str = " 你好,空指针,Welcome to 游戏大厅! ";
for (int i = 0; i < str.length(); i++) {
char s = str.charAt(i);
Matcher m = Pattern.compile(LETTLER).matcher(String.valueOf(s));
if (m.matches()) {
zmList.add(i);
}
System.out.println(zmList);
}
}
}
转码程序:
package com.ywx.test.unicode;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
public class UnicodeConvert {
/** 分隔符. **/
private static final String SEPARATOR = "=";
/** 编码. **/
private static final String CHARACTER = "gbk";
private static final String NEW_LINE = "\r\n";
/**
* 字符串转成unicode.
*/
public static String stringToUnicode(String string) {
StringBuffer unicode = new StringBuffer();
List<Integer> list = null;
if(UnicodeUtil.isLetter(string)){
list = UnicodeUtil.convert(string);
}
for (int i = 0; i < string.length(); i++) {
// 如果包含英文字母,则跳过不需要转码
if(list != null && list.contains(i)){
unicode.append(string.charAt(i));
continue;
}else{
char c = string.charAt(i);
unicode.append("\\u" + Integer.toHexString(c));
}
}
return unicode.toString();
}
/**
* unicode转成字符串.
*/
public static String unicodeToString(String unicode) {
StringBuffer string = new StringBuffer();
String[] hex = unicode.split("\\\\u");
for (int i = 1; i < hex.length; i++) {
// 转换出每一个代码点
int data = Integer.parseInt(hex[i], 16);
string.append((char) data);
}
return string.toString();
}
/**
* 批量将将字符串进行转码和反转码.
*
* @param srcPath
* 源文件路径
* @param targetPath
* 目标文件路径
* @param convertType
* 1:字符串转为unicode;2:unicode转为字符串
*/
@SuppressWarnings("unused")
public static void batchUnicodeConvert(String srcPath, String targetPath, String convertType) {
if(convertType != null && !"".equals(convertType)){
File srcFile = new File(srcPath);
File targetFile = new File(targetPath);
if (!srcFile.exists()) {
System.out.println("源文件不存在,现在创建!");
srcFile.mkdirs();
}
if (targetFile == null) {
System.out.println("目标文件不能为空,现在创建!");
targetFile.mkdirs();
}
BufferedReader br = null;
BufferedWriter bw = null;
try {
InputStreamReader reader = new InputStreamReader(new FileInputStream(srcFile), CHARACTER);
br = new BufferedReader(reader);
String line = "";
int index = 1;
while ((line = br.readLine()) != null) {
String content = line;
if (content.contains(SEPARATOR)) {
String key = content.substring(0, content.indexOf(SEPARATOR));
String value = content.substring(content.indexOf(SEPARATOR) + 1);
if ("1".equals(convertType)) {
// 将字符串进行unicode编码
value = stringToUnicode(value);
}
if ("2".equals(convertType)) {
// 将unicode转成字符串
value = unicodeToString(value);
}
String lastContent = key + SEPARATOR + value;
System.out.println("成功编码第[" + index + "]行:" + lastContent);
// 写入资源文件
bw = new BufferedWriter(new FileWriter(targetFile, true));
bw.write(lastContent);
bw.write(NEW_LINE);
bw.flush();
} else {
System.out.println("资源文件在第[" + index + "]行没有[key]=[value]的对应关系!");
}
index++;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
}
if (bw != null) {
bw.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}else{
System.out.println("转换类型不能为空!");
}
}
public static void main(String[] args) {
// String srcPath = PropertyUtil.getProperty("srcFile");
// String targetPath = PropertyUtil.getProperty("targetFile");
// String convertType = PropertyUtil.getProperty("convertType");
String srcPath = "e:\\123.txt";
String targetPath = "e:\\456.txt";
String convertType = "1";
batchUnicodeConvert(srcPath, targetPath, convertType);
}
}
测试:
在e盘准备好需要转码的资源文件,我用于测试的如下:
运行程序转码结果:
成功编码第[1]行:login.error.info=\u767b\u5f55\u5931\u8d25\uff01
成功编码第[2]行:task.upload.success=\u4efb\u52a1\u4e0a\u4f20\u6210\u529f\uff01
成功编码第[3]行:aaa=\u4e16\u754c\uff0c\u4f60\u597d\u3002
资源文件在第[4]行没有[key]=[value]的对应关系!
成功编码第[5]行:ddd=hello\u20world\uff0c\u65f6\u5c1a\u5b9e\u73b0\u3002
查看转码后的资源文件:
备注:英文的不需要转码,程序支持自动跳过,支持其他地区语言的转码操作。