Android中汉字转换为拼音
从14年10月开始学到Android,到现在差不多已经快两年个了时光了,自己也是一个比较懒得人,很多时候都是拿来主义,为了督促自己反思总结所遇到的问题,特开博客进行整理。
Android中汉字转换为拼音,常用的有两种,两种也存在的一些bug,不过稍作修改,都是可以用的。
1.Android手机自身的通讯录本手有汉字转换为拼音实现;我们可以偷个懒,直接用系统自带的,直接拿来用,HanziToPinyin.java类。
2.另一种是直接使用Java开源的Pinyin4J.jar
先说第一种,直接上传代码,
String word="单雄心";
ArrayList<HanziToPinyin.Token> tokens = HanziToPinyin.getInstance().get();
StringBuilder sb = new StringBuilder();
if (tokens != null && tokens.size() > 0) {
for (HanziToPinyin.Token token : tokens) {
if (HanziToPinyin.Token.PINYIN == token.type) {
sb.append(token.target);
} else {
sb.append(token.source);
}
}
}
return sb.toString();
返回的结果是:DANXIONGXIN
这里我们看到单是多音字,这边只显示dan的读音,没有把他作为姓氏处理, 这里需要我自己先进行过滤一下,我下面列出来姓氏的多音字:
@”种” : @”chong”,
@”单” : @”shan”,
@”解” : @”xie” ,
@”查” : @”zha” ,
@”曾” : @”zeng”,
@”盖” : @”ge”,
@”缪” : @”miao”,
@”朴” : @”piao”,
@”繁” : @”po”,
@”仇” : @”qiu”,
@”么” : @”yao”
处理一下,就可以了。
另一种是界面利用Java的开源项目Pinyin4J.jar,代码如下:
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import android.text.TextUtils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
public class PinyinUtil {
/**姓氏的特殊汉字*/
private char[] hanzi={'种','单','解','曾','盖','缪','朴','繁','仇','仇'};
/**特殊姓氏的读音*/
private String[] pinyin={"shan","xie","zha","zeng","ge","miao","piao","po","qiu","yao"};
/*public Map<String,String> getPinyin(String content){
String[][] temp = getPinyinss(content);
String[] pingyin = Exchange(temp);
if(temp.length==1){
Map<String,String> map=new HashMap<String,String>();
for(int i=0;i<pingyin.length;i++){
map.put(pingyin[i],pingyin[i].substring(0,1));
}
return map;
}else{
for (int i=0;i<temp[0].length;i++){
if(!TextUtils.isEmpty(temp[0][i])){
temp[0][i]=temp[0][i].substring(0,1);
}
}
String[] pingyinJane=ExchangeJane(temp);
return getPinyin(pingyin,pingyinJane);
}
}*/
/**
* 获取拼音的集合(包含多音字)
* @param content
* @param srcChar
* @param hpof
* @return
*/
private String[][] getPinyinss(String content) {
char[] srcChar=content.toCharArray();
HanyuPinyinOutputFormat hpof = new HanyuPinyinOutputFormat();
//HanyuPinyinCaseType.UPPERCASE表示大写,HanyuPinyinCaseType.LOWERCASE表示小写
hpof.setCaseType(HanyuPinyinCaseType.LOWERCASE);
/**
* HanyuPinyinToneType.WITHOUT_TONE表示不标声调,HanyuPinyinToneType.WITH_TONE_NUMBER表示在拼音末尾标注数字1-4表示四个声调,HanyuPinyinToneType.WITH_TONE_MARK表示在拼音上方标注四个声调的符号
*/
hpof.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
/**
* HanyuPinyinVCharType.WITH_V表示ü显示为字符v,HanyuPinyinVCharType.WITH_U_AND_COLON表示ü显示为字符u:,
* HanyuPinyinVCharType.WITH_U_UNICODE表示ü显示为字符ü
* 当声调形式设置为符号声调时,setVCharType只能选择HanyuPinyinVCharType.WITH_U_UNICODE,
* 否则运行时会报错“net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination: tone marks cannot be added to v or u:”
*/
hpof.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
String[][] temp = new String[content.length()][];
for (int i=0;i<srcChar.length;i++){
try {
char c = srcChar[i];
// 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")){
if(i==0){
String [] str=new String[1];
String first=getSpeWord(c);
if(first!=null){
str[0]=first;
temp[i]=str;
}else{
temp[i]=PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hpof);
}
}else{
temp[i]=PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hpof);
}
}else if (((int) c >= 65 && (int) c <= 90)
|| ((int) c >= 97 && (int) c <= 122)) {
temp[i] = new String[] { String.valueOf(srcChar[i]) };
}else {
temp[i] = new String[] { "" };
}
} catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
badHanyuPinyinOutputFormatCombination.printStackTrace();
}
}
return temp;
}
/**
* 获取拼音的集合(多音字默认取第一个)
* @param content
* @return
*/
public String[] getPinyins(String content) {
char[] srcChar=content.toCharArray();
HanyuPinyinOutputFormat hpof = new HanyuPinyinOutputFormat();
//HanyuPinyinCaseType.UPPERCASE表示大写,HanyuPinyinCaseType.LOWERCASE表示小写
hpof.setCaseType(HanyuPinyinCaseType.LOWERCASE);
/**
* HanyuPinyinToneType.WITHOUT_TONE表示不标声调,HanyuPinyinToneType.WITH_TONE_NUMBER表示在拼音末尾标注数字1-4表示四个声调,HanyuPinyinToneType.WITH_TONE_MARK表示在拼音上方标注四个声调的符号
*/
hpof.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
/**
* HanyuPinyinVCharType.WITH_V表示ü显示为字符v,HanyuPinyinVCharType.WITH_U_AND_COLON表示ü显示为字符u:,
* HanyuPinyinVCharType.WITH_U_UNICODE表示ü显示为字符ü
* 当声调形式设置为符号声调时,setVCharType只能选择HanyuPinyinVCharType.WITH_U_UNICODE,
* 否则运行时会报错“net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination: tone marks cannot be added to v or u:”
*/
hpof.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
String[] temp = new String[content.length()];
for (int i=0;i<srcChar.length;i++){
try {
char c = srcChar[i];
// 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")){
if(i==0){
String first=getSpeWord(c);
if(first!=null){
temp[i]=first;
}else{
temp[i]=PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hpof)[0];
}
}else{
temp[i]=PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hpof)[0];
}
}else if (((int) c >= 65 && (int) c <= 90)
|| ((int) c >= 97 && (int) c <= 122)) {
temp[i] = String.valueOf(srcChar[i]);
}else {
temp[i] = "";
}
} catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
badHanyuPinyinOutputFormatCombination.printStackTrace();
}
}
return temp;
}
/**
* 获取全拼
* @param content
* @return
*/
public String getPinyin(String content){
StringBuffer sb=new StringBuffer();
String [] pinyins=getPinyins(content);
for(int i=0;i<pinyins.length;i++){
sb.append(pinyins[i]);
}
return sb.toString();
}
/**
* 获取简拼
* @param content
* @return
*/
public String getPinyinJane(String content){
StringBuffer sb=new StringBuffer();
String [] pinyins=getPinyins(content);
for(int i=0;i<pinyins.length;i++){
if(!TextUtils.isEmpty(pinyins[i])){
sb.append(pinyins[i]);
}
}
return sb.toString();
}
/**特殊姓氏汉字返回的结果*/
private String getSpeWord(char word){
for(int i=0;i<hanzi.length;i++){
if(hanzi[i]==word){
return pinyin[i];
}
}
return null;
}
private Map<String,String> getPinyin(String[] pingyin,String[] pingyinJane){
Map<String,String> map=new HashMap<String,String>();
for(int i=0;i<pingyin.length;i++){
map.put(pingyin[i],pingyinJane[i]);
}
return map;
}
private String[] Exchange(String[][] strJaggedArray) {
String[][] temp = DoExchange(strJaggedArray);
return temp[0];
}
private String[] ExchangeJane(String[][] strJaggedArray) {
String[][] temp = DoExchangeJane(strJaggedArray);
return temp[0];
}
private String[][] DoExchange(String[][]strJaggedArray) {
int len = strJaggedArray.length;
if (len >= 2) {
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1 * len2;
String[] temp = new String[newlen];
int Index = 0;
for (int i = 0; i < len1; i++) {
for (int j = 0; j < len2; j++) {
temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j];
Index++;
}
}
String[][] newArray = new String[len - 1][];
for (int i = 2; i < len; i++) {
newArray[i - 1] = strJaggedArray[i];
}
newArray[0] = temp;
return DoExchange(newArray);
} else {
return strJaggedArray;
}
}
private String[][] DoExchangeJane(String[][]strJaggedArray) {
int len = strJaggedArray.length;
if (len >= 2) {
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1 * len2;
String[] temp = new String[newlen];
int Index = 0;
for (int i = 0; i < len1; i++) {
for (int j = 0; j < len2; j++) {
if (TextUtils.isEmpty(strJaggedArray[1][j])){
temp[Index] = strJaggedArray[0][i];
}else{
temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j].substring(0,1);
}
Index++;
}
}
String[][] newArray = new String[len - 1][];
for (int i = 2; i < len; i++) {
//for (int j=0;j<strJaggedArray[i].length;j++){
newArray[i - 1] = strJaggedArray[i];
//}
}
newArray[0] = temp;
return DoExchangeJane(newArray);
} else {
return strJaggedArray;
}
}
private String makeStringByStringSet(Set<String> stringSet) {
StringBuilder str = new StringBuilder();
int i = 0;
for (String s : stringSet) {
if (i == stringSet.size() - 1) {
str.append(s);
} else {
str.append(s + ",");
}
i++;
}
return str.toString().toLowerCase(Locale.getDefault());
}
}
使用Pinyin4J.jar也存在bug,比如和字是多音字,但是输出的结果却是,he、huo、hai、hu,实际上只有he、huo、hu。
找不到HanziToPinyin类和Pinyin4J.jar的同学可以这里下载
http://download.csdn.net/detail/u013224617/9537058