.net 汉字转拼音首字母

Program.cs

using amazingdemo.common;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace amazingdemo
{
    class Program
    {
        static void Main(string[] args)
        {
            SortHelper alphaGroupHelper = new SortHelper();
            Console.WriteLine("======================");
            ArrayList list = new ArrayList();
            list.Add("adisen");
            list.Add("bulsi");
            list.Add("Kobe");
            list.Add("布丁");
            list.Add("杜甫");
            list.Add("元方");
            IDictionary map = alphaGroupHelper.sort(list);
            Console.WriteLine("-------分组后的输出-----------");

            Console.WriteLine("A分组:");
            list = (ArrayList)map["A"];
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine(list[i]);
            }
            Console.WriteLine("B分组:");
            list = (ArrayList)map["B"];
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine(list[i]);
            }

            Console.WriteLine("C分组:");
            list = (ArrayList)map["C"];
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine(list[i]);
            }

            Console.WriteLine("D分组:");
            list = (ArrayList)map["D"];
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine(list[i]);
            }

            Console.WriteLine("Y分组:");
            list = (ArrayList)map["Y"];
            for (int i = 0; i < list.Count; i++)
            {
                Console.WriteLine(list[i]);
            }

            Console.ReadKey();
        }
    }
}

SortHelper.cs

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Security.Policy;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace amazingdemo.common
{
    public class SortHelper
    {
        //字母Z使用了两个标签,这里有27个值
        //i, u, v都不做声母, 跟随前面的字母
        private readonly char[] chartable =
        {
            '啊', '芭', '擦', '搭', '蛾', '发', '噶', '哈', '哈',
            '击', '喀', '垃', '妈', '拿', '哦', '啪', '期', '然',
            '撒', '塌', '塌', '塌', '挖', '昔', '压', '匝', '座'
        };

        private readonly char[] alphatableb =
        {
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
            'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
        };

        private readonly char[] alphatables =
        {
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
            'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
        };

        private readonly int[] table = new int[27]; //初始化

        public SortHelper()
        {
            for (int i = 0; i < 27; ++i)
            {
                table[i] = gbValue(chartable[i]);
            }
        }

        //转字母
        protected virtual char Char2Alpha(char ch, string type)
        {
            if (ch >= 'a' && ch <= 'z')
            {
                return (char) (ch - 'a' + 'A'); //为了按字母排序先返回大写字母
            }

            if (ch >= 'A' && ch <= 'Z')
            {
                return ch;
            }

            int gb = gbValue(ch);
            if (gb < table[0])
            {
                return '0';
            }

            int i;
            for (i = 0; i < 26; ++i)
            {
                if (match(i, gb))
                {
                    break;
                }
            }

            if (i >= 26)
            {
                return '0';
            }
            else
            {
                if ("b".Equals(type))
                {
                    //大写
                    return alphatableb[i];
                }
                else
                {
                    //小写
                    return alphatables[i];
                }
            }
        }

        //根据一个包含汉字的字符串返回一个汉字拼音首字母的字符串
        public virtual string String2Alpha(string SourceStr, string type)
        {
            string Result = "";
            int StrLength = SourceStr.Length;
            int i;
            try
            {
                for (i = 0; i < StrLength; i++)
                {
                    Result += Char2Alpha(SourceStr[i], type);
                }
            }
            catch (Exception)
            {
                Result = "";
            }

            return Result;
        }

        //根据一个包含汉字的字符串返回第一个汉字拼音首字母的字符串
        protected virtual string String2AlphaFirst(string SourceStr, string type)
        {
            string Result = "";
            try
            {
                Result += Char2Alpha(SourceStr[0], type);
            }
            catch (Exception)
            {
                Result = "";
            }

            return Result;
        }

        private bool match(int i, int gb)
        {
            if (gb < table[i])
            {
                return false;
            }

            int j = i + 1;

            //字母Z使用了两个标签
            while (j < 26 && (table[j] == table[i]))
            {
                ++j;
            }

            if (j == 26)
            {
                return gb <= table[j];
            }
            else
            {
                return gb < table[j];
            }
        }

        //取出汉字的编码
        private int gbValue(char ch)
        {
            #region 方法一
            //byte[] gb2312 = Encoding.GetEncoding("GB2312").GetBytes(new char[] { ch });                             //获得编码字节序列
            //int n = (int)gb2312[0] << 8;   //第一个字节序列左移8位
            //n += (int)gb2312[1];
            //return n;
            #endregion

            #region 方法二,也可用
            string str = "";
            str += ch;
            try
            {
                sbyte[] bytes = str.GetBytes("GBK");
                if (bytes.Length < 2)
                {
                    return 0;
                }
                return (bytes[0] << 8 & 0xff00) + (bytes[1] & 0xff);
            }
            catch (Exception e)
            {
                Console.Write(e.ToString());
                return 0;
            }
            #endregion
        }

        public virtual IDictionary sort(IList list)
        {
            IDictionary map = new Hashtable();
            ArrayList arraylist = new ArrayList();
            string[] alphatableb = new string[]
            {
                "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
                "V", "W", "X", "Y", "Z"
            };
            foreach (string a in alphatableb)
            {
                for (int i = 0; i < list.Count; i++)
                {
                    //方法一
                    //为了排序都返回大写字母
                    if (a.Equals(String2AlphaFirst(list[i].ToString(), "b")))
                    {
                        arraylist.Add(list[i].ToString());
                    }

                    //方法二
                    //if (a.Equals(GetOneByName(list[i].ToString()).ToUpper()))
                    //{
                    //    arraylist.Add(list[i].ToString());
                    //}
                }

                map[a] = arraylist;
                arraylist = new ArrayList();
            }

            return map;
        }

        /*------------------------------方法二---------------------------------*/
        /// <summary>
        /// 通过汉字区位码得到其首字母(小写)
        /// </summary>
        /// <param name="nCode">汉字编码</param>
        /// <returns></returns>
        private string FirstLetter(int nCode)
        {
            if (nCode >= 1601 && nCode < 1637) return "a";
            if (nCode >= 1637 && nCode < 1833) return "b";
            if (nCode >= 1833 && nCode < 2078) return "c";
            if (nCode >= 2078 && nCode < 2274) return "d";
            if (nCode >= 2274 && nCode < 2302) return "e";
            if (nCode >= 2302 && nCode < 2433) return "f";
            if (nCode >= 2433 && nCode < 2594) return "g";
            if (nCode >= 2594 && nCode < 2787) return "h";
            if (nCode >= 2787 && nCode < 3106) return "j";
            if (nCode >= 3106 && nCode < 3212) return "k";
            if (nCode >= 3212 && nCode < 3472) return "l";
            if (nCode >= 3472 && nCode < 3635) return "m";
            if (nCode >= 3635 && nCode < 3722) return "n";
            if (nCode >= 3722 && nCode < 3730) return "o";
            if (nCode >= 3730 && nCode < 3858) return "p";
            if (nCode >= 3858 && nCode < 4027) return "q";
            if (nCode >= 4027 && nCode < 4086) return "r";
            if (nCode >= 4086 && nCode < 4390) return "s";
            if (nCode >= 4390 && nCode < 4558) return "t";
            if (nCode >= 4558 && nCode < 4684) return "w";
            if (nCode >= 4684 && nCode < 4925) return "x";
            if (nCode >= 4925 && nCode < 5249) return "y";
            if (nCode >= 5249 && nCode < 5590) return "z";
            return "";
        }

        ///   <summary> 
        ///   判断是否为汉字 
        ///   </summary> 
        ///   <param   name="chrStr">待检测字符串</param> 
        ///   <returns>是汉字返回true</returns> 
        private bool IsChineseCharacters(string chrStr)
        {
            Regex CheckStr = new Regex("[\u4e00-\u9fa5]");
            return CheckStr.IsMatch(chrStr);
        }

        /// <summary>
        /// 得到每个汉字的字首拼音码字母(小写)
        /// </summary>
        /// <param name="chrStr">输入字符串</param>
        /// <returns>返回结果</returns>
        private string ChangeByName(string chrStr)
        {
            string strHeadString = string.Empty;
            Encoding gb = System.Text.Encoding.GetEncoding("gb2312");

            for (int i = 0; i < chrStr.Length; i++)
            {
                //检测该字符是否为汉字
                if (!IsChineseCharacters(chrStr.Substring(i, 1)))
                {
                    strHeadString += chrStr.Substring(i, 1);
                    continue;
                }    

                byte[] bytes = gb.GetBytes(chrStr.Substring(i, 1));
                string lowCode = Convert.ToString(bytes[0] - 0xA0, 16);
                string hightCode = Convert.ToString(bytes[1] - 0xA0, 16);
                int nCode = Convert.ToUInt16(lowCode, 16) * 100 + Convert.ToUInt16(hightCode, 16); //得到区位码
                strHeadString += FirstLetter(nCode);
            }

            return strHeadString;
        }

        /// <summary>
        /// 得到第一个的字首拼音码字母(小写)
        /// </summary>
        /// <param name="chrStr">输入字符串</param>
        /// <returns>返回结果</returns>
        private string GetOneByName(string chrStr)
        {
            String FirstText=chrStr.Substring(0, 1);
            string strHeadString = string.Empty;
            Encoding gb = System.Text.Encoding.GetEncoding("gb2312");

            //检测该字符是否为汉字
            if (!IsChineseCharacters(FirstText))
            {
                strHeadString += FirstText;
            }
            else
            {
                byte[] bytes = gb.GetBytes(FirstText);
                string lowCode = Convert.ToString(bytes[0] - 0xA0, 16);
                string hightCode = Convert.ToString(bytes[1] - 0xA0, 16);
                int nCode = Convert.ToUInt16(lowCode, 16) * 100 + Convert.ToUInt16(hightCode, 16); //得到区位码
                strHeadString += FirstLetter(nCode);
            }
            return strHeadString;
        }
        /*------------------------------方法二END---------------------------------*/
    }

    static class StringHelperClass
    {
        static string SubstringSpecial(this string self, int start, int end)
        {
            return self.Substring(start, end - start);
        }

        static bool StartsWith(this string self, string prefix, int toffset)
        {
            return self.IndexOf(prefix, toffset, System.StringComparison.Ordinal) == toffset;
        }

        static string[] Split(this string self, string regexDelimiter, bool trimTrailingEmptyStrings)
        {
            string[] splitArray = System.Text.RegularExpressions.Regex.Split(self, regexDelimiter);

            if (trimTrailingEmptyStrings)
            {
                if (splitArray.Length > 1)
                {
                    for (int i = splitArray.Length; i > 0; i--)
                    {
                        if (splitArray[i - 1].Length > 0)
                        {
                            if (i < splitArray.Length)
                                System.Array.Resize(ref splitArray, i);

                            break;
                        }
                    }
                }
            }

            return splitArray;
        }

        static string NewString(sbyte[] bytes)
        {
            return NewString(bytes, 0, bytes.Length);
        }

        static string NewString(sbyte[] bytes, int index, int count)
        {
            return System.Text.Encoding.UTF8.GetString((byte[]) (object) bytes, index, count);
        }

        static string NewString(sbyte[] bytes, string encoding)
        {
            return NewString(bytes, 0, bytes.Length, encoding);
        }

        static string NewString(sbyte[] bytes, int index, int count, string encoding)
        {
            return System.Text.Encoding.GetEncoding(encoding).GetString((byte[]) (object) bytes, index, count);
        }

        internal  static sbyte[] GetBytes(this string self)
        {
            return GetSBytesForEncoding(System.Text.Encoding.UTF8, self);
        }

        internal static sbyte[] GetBytes(this string self, System.Text.Encoding encoding)
        {
            return GetSBytesForEncoding(encoding, self);
        }

        internal static sbyte[] GetBytes(this string self, string encoding)
        {
            return GetSBytesForEncoding(System.Text.Encoding.GetEncoding(encoding), self);
        }

        private static sbyte[] GetSBytesForEncoding(System.Text.Encoding encoding, string s)
        {
            sbyte[] sbytes = new sbyte[encoding.GetByteCount(s)];
            encoding.GetBytes(s, 0, s.Length, (byte[]) (object) sbytes, 0);
            return sbytes;
        }
    }
}

如果不是.NET framework,而是使用.NET Core 中使用GB2312编码报错的问题
System.Text.Encoding.GetEncodings();发现获得的编码中没有GB2312或者GBK
需要nuget安装包:System.Text.Encoding.CodePages

如图

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值