public class HanZiToPinYin {
/**
* 一个汉字转拼音
* @param hanzi
* @return
*/
public static String toPinYin(char hanzi) {
HanyuPinyinOutputFormat hanyuPinyin = new HanyuPinyinOutputFormat();
hanyuPinyin.setCaseType(HanyuPinyinCaseType.LOWERCASE);
hanyuPinyin.setToneType(HanyuPinyinToneType.WITH_TONE_MARK);
hanyuPinyin.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
String[] pinyinArray = null;
try {
if (hanzi >= 0x4e00 && hanzi <= 0x9fa5) {
pinyinArray = PinyinHelper.toHanyuPinyinStringArray(hanzi,
hanyuPinyin);
return pinyinArray[0];
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
return null;
}
private static String PinYin_1 = "[ĀāēĒĪīįŌōŪūǕǖü]";
private static String PinYin_23 = "[ÁáǎǍĂăĕéěĔĚǏǐĭÍíńŃŇňŎŏǑǒÓóŬŭǓǔúÚǗǘǙǚ]";
private static String PinYin_4 = "[ÀàėÈèìÌǸǹòÒùÙǛǜ]";
/**
* 从一个汉字(的拼音)得出音调:YD(1声,如张)=1, YD(2声和3声,如李)=2, YD(4声,如赵)=3
*/
public static String toNum(char hanzi) {
String pinyinString = toPinYin(hanzi);
String tonum = "s";
if (pinyinString != null) {
if (Pattern.compile(PinYin_4).matcher(pinyinString).find())
tonum = "3";
else if (Pattern.compile(PinYin_23).matcher(pinyinString).find())
tonum = "2";
else if (Pattern.compile(PinYin_1).matcher(pinyinString).find())
tonum = "1";
}
// System.out.println(" toNum " + hanzi+ " "+ pinyinString+ " "+ tonum);
return tonum;
}
/**
* 从几个汉字(的音调组合)得出简化编码:(*****以后再解释)
* mincount = 1时 最后结果为 由1~6组成的一位数
* mincount = 2时 最后结果为 由1~3组成的两位数
* mincount = 3时 最后结果为 由1~3组成的三位数
* @param
* @return
*/
public static String toNum(String hanziString, int mincount) {
String pynum = "s";
for (int i = 0; i < hanziString.length(); i++) {
pynum = pynum + toNum(hanziString.charAt(i));
}
while (pynum.length() > mincount + 1) {// 1222
pynum = pynum.replaceFirst("s(4|12|23|31)", "s4");
pynum = pynum.replaceFirst("41", "3");
pynum = pynum.replaceFirst("42", "1");
pynum = pynum.replaceFirst("43", "2");
pynum = pynum.replaceFirst("s(5|11|22|33)", "s5");
pynum = pynum.replaceFirst("51", "1");
pynum = pynum.replaceFirst("52", "2");
pynum = pynum.replaceFirst("53", "3");
pynum = pynum.replaceFirst("s(6|21|13|32)", "s6");
pynum = pynum.replaceFirst("61", "2");
pynum = pynum.replaceFirst("62", "3");
pynum = pynum.replaceFirst("63", "1");
}
if (pynum.length() <= 1) {
pynum = "000";
} else if (pynum.length() > mincount) {
// 最多 mincount+1
pynum = pynum.substring(1);
} else if (pynum.length() % 2 == 1) {
String newstr = "000" + pynum.substring(1);
pynum = newstr.substring(newstr.length() - mincount); // 2+1位数可前补0
} else {
String newstr = pynum.substring(1) + pynum.substring(1)
+ pynum.substring(1);
pynum = newstr.substring(newstr.length() - mincount); // 1+1位数可重复
}
// System.out.println(" toNum2 " + hanziString+ " "+ mincount+ " "+ pynum);
return pynum;
}
/**
* 从几个汉字(的音调组合)得出简化编码的三元素:(**这个没多大意义,以后再解释)
* 如:S(关云长)=s111,s122,s132
*/
public static String[] toType(String hanziString) {
int pynum1 = Integer.parseInt(toNum(hanziString, 1));
String pynum3 = toNum(hanziString, 3);
int pynum2 = Integer.parseInt(pynum3);
String[] pytype = null;
if (pynum1 >= 1 && pynum1 <= 3) {
pytype = new String[3];
pytype[0] = "s" + pynum1 + "1" + pynum3.charAt(0);
pytype[1] = "s" + pynum1 + "2" + pynum3.charAt(1);
pytype[2] = "s" + pynum1 + "3" + pynum3.charAt(2);
} else if (pynum1 >= 4 && pynum1 <= 6) {
pytype = new String[3];
pytype[0] = "s" + "1" + (pynum1 - 3) + (pynum2 % 10);
pytype[1] = "s" + "2" + (pynum1 - 3) + (pynum2 % 10);
pytype[2] = "s" + "3" + (pynum1 - 3) + (pynum2 % 10);
}
return pytype;
}
/**
* 向右比较, 将姓名A与姓名B的比较,转化为,姓名A与姓名B各元素分别比较相加之和
*/
public static double toRightType(String hanziString, String caseString) {
// System.out.println("toRightType " + hanziString+ " "+ caseString);
int pynum1 = Integer.parseInt(toNum(caseString, 1));
String pynum3 = toNum(caseString, 3);
int pynum2 = Integer.parseInt(pynum3);
if (pynum1 >= 1 && pynum1 <= 3) {
return (toLeftType(hanziString, pynum1 + "1" + pynum3.charAt(0))
+ toLeftType(hanziString, pynum1 + "2" + pynum3.charAt(1)) + toLeftType(
hanziString, pynum1 + "3" + pynum3.charAt(2))) / 3;
} else if (pynum1 >= 4 && pynum1 <= 6) {
return toLeftType(hanziString, "0" + (pynum1 - 3) + (pynum2 % 10));
}
return 0;
}
/**
* 向左比较,得出姓名(几个汉字)与(另一姓名)编码 匹配值:P(姓名,编码)=?
* 也可以比较,得出姓名(几个汉字)与(某一性格的)编码 匹配值:P(姓名,编码)=?
* 转化为,姓名各元素分别与另一编码比较相加之和
* (******重点来了,只要证明得出的性格编码的准确性,即可证明一系列词语编码合理性,进而证明一个超级自然语言语义系统的合理性)
*/
public static double toLeftType(String hanziString, String caseString) {
// System.out.println(" toLeftType " + hanziString+ " "+ caseString);
int pynum1 = Integer.parseInt(toNum(hanziString, 1));
String pynum3 = toNum(hanziString, 3);
int pynum2 = Integer.parseInt(pynum3);
double pytype = 1;
if (pynum1 >= 1 && pynum1 <= 3) {
pytype = pytype
* (toType_sum(pynum1 + "1" + pynum3.charAt(0), caseString)
+ toType_sum(pynum1 + "2" + pynum3.charAt(1),
caseString) + toType_sum(pynum1 + "3"
+ pynum3.charAt(2), caseString)) / 3;
} else if (pynum1 >= 4 && pynum1 <= 6) {
pytype = pytype
* toType_sum("0" + (pynum1 - 3) + (pynum2 % 10), caseString);
}
return pytype;
}
/**
* 得出编码与编码的匹配值:P(编码,编码)=?
* 多种黄金分隔点,也许在其他方面会用
* (*****这个方法在第二篇会介绍)
*/
private static double toType_sum(String pytype, String caseString) {
double ret = 1;
// bs = (SQRT(5)-1)/2 = 0.618033989
// be = (1/bs-1)/2-SQRT(((1/bs-1)/2)^2-1) = 0.250932498
if (pytype.length() == 3 && caseString.length() == 3)
for (int i = 0; i < pytype.length(); i++) {
String target = pytype.substring(i, i + 1)
+ caseString.substring(i, i + 1);
if (Pattern.compile("0[0123]|[0123]0").matcher(target).find())
ret = ret * 1 / 3;
else if (Pattern.compile("[13]2|2[13]").matcher(target).find())
ret = ret * 0.190983006; // (1-bs)/2
else if (Pattern.compile("13|31").matcher(target).find())
ret = ret * 0.047923843;// 0.19 * be
else if (Pattern.compile("11|33").matcher(target).find())
ret = ret * 0.761093152;// 0.19 / be
else if (Pattern.compile("22").matcher(target).find())
ret = ret * 0.618033989;// bs * bs
}
// System.out.println(" toType " + pytype+ " "+ caseString+ " "+ ret);
return ret;
}
}
姓名预测,个性匹配与运势——算法1:姓名编码(JAVA代码)
最新推荐文章于 2021-02-24 06:34:38 发布