拼音工具类(多音字处理)

因项目需求,需要将中文名称转拼音存储,方便查询,考虑到多音字处理,要求最终结果如下:
举例说明:解和景都是多音字(小)
结果:
[xiehejingdushiduoyinzi(xiao), xiehuojingdoushiduoyinzi(xiao), jiehejingdushiduoyinzi(xiao), xieheyingdoushiduoyinzi(xiao), xiehujingdushiduoyinzi(xiao), jiehuoyingdoushiduoyinzi(xiao), jiehujingdoushiduoyinzi(xiao), jiehejingdoushiduoyinzi(xiao), xiehujingdoushiduoyinzi(xiao), jiehuojingdoushiduoyinzi(xiao), xiehaiyingdushiduoyinzi(xiao), xiehejingdoushiduoyinzi(xiao), xiehuyingdushiduoyinzi(xiao), xiehuoyingdoushiduoyinzi(xiao), jiehuoyingdushiduoyinzi(xiao), jiehuyingdushiduoyinzi(xiao), jieheyingdoushiduoyinzi(xiao), jiehuyingdoushiduoyinzi(xiao), jiehuojingdushiduoyinzi(xiao), xiehaiyingdoushiduoyinzi(xiao), jiehaijingdoushiduoyinzi(xiao), jiehaiyingdoushiduoyinzi(xiao), xiehaijingdushiduoyinzi(xiao), xiehuojingdushiduoyinzi(xiao), jieheyingdushiduoyinzi(xiao), jiehaijingdushiduoyinzi(xiao), xieheyingdushiduoyinzi(xiao), jiehujingdushiduoyinzi(xiao), xiehaijingdoushiduoyinzi(xiao), xiehuyingdoushiduoyinzi(xiao), jiehaiyingdushiduoyinzi(xiao), xiehuoyingdushiduoyinzi(xiao)]

废话不多说 上代码

添加依赖

		<!-- https://mvnrepository.com/artifact/com.belerweb/pinyin4j -->
		<dependency>
			<groupId>com.belerweb</groupId>
			<artifactId>pinyin4j</artifactId>
			<version>2.5.0</version>
		</dependency>

附上工具类代码

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.apache.log4j.Logger;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 拼音工具类
 * @author wxm
 */
public class PinYinUtils {

    public static final Logger logger = Logger.getLogger(PinYinUtils.class);
    /**
     * 获取字符串拼音的第一个字母
     * @param chinese
     * @return
     */
    public static String ToFirstChar(String chinese){
        String pinyinStr = "";
        char[] newChar = chinese.toCharArray();  //转为单个字符
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        for (int i = 0; i < newChar.length; i++) {
            if (newChar[i] > 128) {
                try {
                    pinyinStr += PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat)[0].charAt(0);
                } catch (BadHanyuPinyinOutputFormatCombination e) {
                    e.printStackTrace();
                }
            }else{
                pinyinStr += newChar[i];
            }
        }
        return pinyinStr;
    }

    /**
     * 汉字转为拼音
     * 不考虑多音字处理
     * @param chinese
     * @return
     */
    public static String ToPinyin1(String chinese){
        String pinyinStr = "";
        char[] newChar = chinese.toCharArray();
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        try {
            for (int i = 0; i < newChar.length; i++) {
                List<String> strs = new ArrayList<>();
                if (newChar[i] > 128) {
                    if(regEx(String.valueOf(newChar[i]))){
                        pinyinStr += newChar[i];
                    }else{
                        pinyinStr += PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat)[0];
                    }
                }else{
                    pinyinStr += newChar[i];
                }
            }
        } catch (Exception e) {
            logger.error("汉字转拼音异常:"+chinese);
        }
        return pinyinStr;
    }

    /**
     * 汉字转为拼音
     * 考虑多音字处理
     * @param chinese
     * @return
     */
    public static String ToPinyin2(String chinese){
        String pinyinStr = "";
        char[] newChar = chinese.toCharArray();
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        List<List<String>> list = new ArrayList<>();
        try {
            for (int i = 0; i < newChar.length; i++) {
                List<String> strs = new ArrayList<>();
                if (newChar[i] > 128) {
                    if(regEx(String.valueOf(newChar[i]))){
                        strs.add(String.valueOf(newChar[i]));
                    }else{
                        int num = PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat).length;
                        for (int j = 0; j < num; j++) {
                            strs.add(PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat)[j]);
                        }
                    }
                }else{
                    strs.add(String.valueOf(newChar[i]));
                }
                list.add(strs);
            }
        } catch (Exception e) {
            logger.error("汉字转拼音异常:"+chinese);
        }
        pinyinStr = strArray(list).toString();
        return pinyinStr;
    }

    public static boolean regEx(String s){
        String regEx="[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
        Pattern p=Pattern.compile(regEx);
        Matcher m=p.matcher(s);
        return m.find();
    }

    public static Set<String> strArray(List<List<String>> list){
        if(list==null||list.isEmpty()){
            return new HashSet<String>();
        }
        Set<String>  set = new HashSet<String>();
        for (List<String> item : list) {
            set = splice(item,set);
        }
        return set;
    }

    private static Set<String> splice(List<String> item, Set<String> set) {
        Set<String>  result = new HashSet<String>();
        for (String str1 : item) {
            if(set.isEmpty()){
                result.add(str1);
                continue;
            }
            for (String str2 : set) {
                result.add(str2+str1);
            }
        }
        return result;
    }

    /**
     * 测试main方法
     * @param args
     */
    public static void main(String[] args) {
        System.out.println(ToFirstChar("汉字转换为拼音").toUpperCase()); //转为首字母大写
        System.out.println("结果:"+ToPinyin2("解和景都是多音字(小)"));
    }
}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值