获取中文词组的汉语拼音首字母拼接

13 篇文章 1 订阅
10 篇文章 0 订阅

我们需要一个快捷批量处理:中文词组获取其汉语拼音首字母并拼接起来。

比如:

输出功率3:SCGL3

一鸣惊人:YMJR

我们可以采用字符字典法,穷举出所有的汉字【暂只考虑简体中文】

Dictionary<char,string> dict;

比如{'中',"Z"},

{'国',"G"},

{'人',"R"}

拼音Excel库【GBK汉字拼音对照表.xls】如下:

将其设置为 始终复制

添加对NPOI操作Excel的支持库

NpoiExcelOperateUtil.cs源程序如下:

using NPOI.HSSF.UserModel;
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ChinesePinyinMappingDemo
{
    /// <summary>
    /// Excel表格与DataTable内存数据表的相互转换操作类
    /// 斯内科 2023-08-08
    /// </summary>
    public static class NpoiExcelOperateUtil
    {
        /// <summary>
        /// Excel的第一个工作簿(Sheet)转化成DataTable
        /// 使用EXCEL的第一个工作簿,默认为Sheet1
        /// </summary>
        /// <param name="file"></param>
        /// <returns></returns>
        public static DataTable ExcelToTable(string file)
        {
            DataTable dt = new DataTable();
            IWorkbook workbook;
            string fileExt = Path.GetExtension(file).ToLower();
            using (FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read))
            {
                //XSSFWorkbook 适用XLSX格式,HSSFWorkbook 适用XLS格式
                if (fileExt == ".xlsx")
                {
                    workbook = new XSSFWorkbook(fs);
                }
                else if (fileExt == ".xls")
                {
                    workbook = new HSSFWorkbook(fs);
                }
                else
                {
                    return null;
                }
                //第一个工作簿
                ISheet sheet = workbook.GetSheetAt(0);
                if (sheet == null)
                {
                    return null;
                }
                return ExcelToTable(file, sheet.SheetName);
            }
        }
        /// <summary>
        /// Excel的指定Sheet转化成内存表
        /// </summary>
        /// <param name="file">路径</param>
        /// <param name="sheetName">sheet名称</param>
        /// <returns></returns>
        public static DataTable ExcelToTable(string file, string sheetName)
        {
            DataTable[] dataTables = ExcelToTable(file, new List<string>() { sheetName });
            if (dataTables != null && dataTables.Length > 0)
            {
                return dataTables[0];
            }
            return null;
        }

        /// <summary>
        /// 一个excel文件的多个Sheet转化成内存表数组,
        /// 每个Sheet都对应一个数据表
        /// </summary>
        /// <param name="file">路径</param>
        /// <param name="list_SheetName">sheet名称集合</param>
        /// <returns></returns>
        public static DataTable[] ExcelToTable(string file, List<string> list_SheetName)
        {
            int count = list_SheetName.Count;
            DataTable[] dtS = new DataTable[count];
            //===============================//
            IWorkbook workbook;
            string fileExt = Path.GetExtension(file).ToLower();
            using (FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read))
            {
                //XSSFWorkbook 适用XLSX格式,HSSFWorkbook 适用XLS格式
                if (fileExt == ".xlsx")
                {
                    workbook = new XSSFWorkbook(fs);
                }
                else if (fileExt == ".xls")
                {
                    workbook = new HSSFWorkbook(fs);
                }
                else
                {
                    return null;
                }
                ISheet[] sheetS = new ISheet[count];
                for (int k = 0; k < count; k++)
                {
                    dtS[k] = new DataTable(list_SheetName[k]);
                    sheetS[k] = workbook.GetSheet(list_SheetName[k]);
                    ISheet sheet = sheetS[k];
                    if (sheet == null)
                    {
                        continue;
                    }
                    DataTable dt = new DataTable(list_SheetName[k]);
                    //表头  
                    IRow header = sheet.GetRow(sheet.FirstRowNum);
                    List<int> columns = new List<int>();
                    for (int i = 0; i < header.LastCellNum; i++)
                    {
                        object obj = GetValueType(header.GetCell(i));
                        if (obj == null || obj.ToString() == string.Empty)
                        {
                            dt.Columns.Add(new DataColumn("Columns" + i.ToString()));
                        }
                        else
                            dt.Columns.Add(new DataColumn(obj.ToString()));
                        columns.Add(i);
                    }
                    //数据  
                    for (int i = sheet.FirstRowNum + 1; i <= sheet.LastRowNum; i++)
                    {
                        DataRow dr = dt.NewRow();
                        bool hasValue = false;
                        foreach (int j in columns)
                        {
                            dr[j] = GetValueType(sheet.GetRow(i).GetCell(j));
                            if (dr[j] != null && dr[j].ToString() != string.Empty)
                            {
                                hasValue = true;
                            }
                        }
                        if (hasValue)
                        {
                            dt.Rows.Add(dr);
                        }
                    }
                    dtS[k] = dt;
                }
            }
            return dtS;
        }

        /// <summary>
        /// Datable导出成Excel
        /// </summary>
        /// <param name="dt"></param>
        /// <param name="file"></param>
        public static void TableToExcel(DataTable dt, string file)
        {
            IWorkbook workbook;
            string fileExt = Path.GetExtension(file).ToLower();
            if (fileExt == ".xlsx")
            {
                //workbook = new XSSFWorkbook();
                workbook = new HSSFWorkbook();
            }
            else if (fileExt == ".xls")
            {
                workbook = new HSSFWorkbook();
            }
            else
            {
                workbook = null;
            }
            if (workbook == null)
            {
                return;
            }
            ISheet sheet = string.IsNullOrEmpty(dt.TableName) ? workbook.CreateSheet("Sheet1") : workbook.CreateSheet(dt.TableName);
            //表头  
            IRow row = sheet.CreateRow(0);
            for (int i = 0; i < dt.Columns.Count; i++)
            {
                ICell cell = row.CreateCell(i);
                cell.SetCellValue(dt.Columns[i].ColumnName);
            }

            //数据  
            for (int i = 0; i < dt.Rows.Count; i++)
            {
                IRow row1 = sheet.CreateRow(i + 1);
                for (int j = 0; j < dt.Columns.Count; j++)
                {
                    ICell cell = row1.CreateCell(j);
                    cell.SetCellValue(dt.Rows[i][j].ToString());
                }
            }

            //转为字节数组  
            MemoryStream stream = new MemoryStream();
            workbook.Write(stream);
            var buf = stream.ToArray();

            //保存为Excel文件  
            using (FileStream fs = new FileStream(file, FileMode.Create, FileAccess.Write))
            {
                fs.Write(buf, 0, buf.Length);
                fs.Flush();
            }
        }

        /// <summary>
        /// 获取单元格类型
        /// </summary>
        /// <param name="cell"></param>
        /// <returns></returns>
        private static object GetValueType(ICell cell)
        {
            if (cell == null)
                return null;
            switch (cell.CellType)
            {
                case CellType.Blank: //BLANK:  
                    return null;
                case CellType.Boolean: //BOOLEAN:  
                    return cell.BooleanCellValue;
                case CellType.Numeric: //NUMERIC:  
                    return cell.NumericCellValue;
                case CellType.String: //STRING:  
                    return cell.StringCellValue;
                case CellType.Error: //ERROR:  
                    return cell.ErrorCellValue;
                case CellType.Formula: //FORMULA:  
                default:
                    return "=" + cell.CellFormula;
            }
        }
    }
}

将默认的Form1重命名为FormChinesePinyinMapping,

FormChinesePinyinMapping设计器代码如下:

文件:FormChinesePinyinMapping.Designer.cs


namespace ChinesePinyinMappingDemo
{
    partial class FormChinesePinyinMapping
    {
        /// <summary>
        /// 必需的设计器变量。
        /// </summary>
        private System.ComponentModel.IContainer components = null;

        /// <summary>
        /// 清理所有正在使用的资源。
        /// </summary>
        /// <param name="disposing">如果应释放托管资源,为 true;否则为 false。</param>
        protected override void Dispose(bool disposing)
        {
            if (disposing && (components != null))
            {
                components.Dispose();
            }
            base.Dispose(disposing);
        }

        #region Windows 窗体设计器生成的代码

        /// <summary>
        /// 设计器支持所需的方法 - 不要修改
        /// 使用代码编辑器修改此方法的内容。
        /// </summary>
        private void InitializeComponent()
        {
            this.rtxtChinese = new System.Windows.Forms.RichTextBox();
            this.btnGetPinyin = new System.Windows.Forms.Button();
            this.label1 = new System.Windows.Forms.Label();
            this.rtxtResult = new System.Windows.Forms.RichTextBox();
            this.label2 = new System.Windows.Forms.Label();
            this.SuspendLayout();
            // 
            // rtxtChinese
            // 
            this.rtxtChinese.Location = new System.Drawing.Point(12, 43);
            this.rtxtChinese.Name = "rtxtChinese";
            this.rtxtChinese.Size = new System.Drawing.Size(536, 677);
            this.rtxtChinese.TabIndex = 0;
            this.rtxtChinese.Text = "";
            // 
            // btnGetPinyin
            // 
            this.btnGetPinyin.Location = new System.Drawing.Point(576, 57);
            this.btnGetPinyin.Name = "btnGetPinyin";
            this.btnGetPinyin.Size = new System.Drawing.Size(75, 23);
            this.btnGetPinyin.TabIndex = 1;
            this.btnGetPinyin.Text = "获取拼音";
            this.btnGetPinyin.UseVisualStyleBackColor = true;
            this.btnGetPinyin.Click += new System.EventHandler(this.btnGetPinyin_Click);
            // 
            // label1
            // 
            this.label1.AutoSize = true;
            this.label1.Location = new System.Drawing.Point(12, 19);
            this.label1.Name = "label1";
            this.label1.Size = new System.Drawing.Size(77, 12);
            this.label1.TabIndex = 2;
            this.label1.Text = "汉语拼接段落";
            // 
            // rtxtResult
            // 
            this.rtxtResult.Location = new System.Drawing.Point(688, 43);
            this.rtxtResult.Name = "rtxtResult";
            this.rtxtResult.Size = new System.Drawing.Size(536, 677);
            this.rtxtResult.TabIndex = 3;
            this.rtxtResult.Text = "";
            // 
            // label2
            // 
            this.label2.AutoSize = true;
            this.label2.Location = new System.Drawing.Point(686, 19);
            this.label2.Name = "label2";
            this.label2.Size = new System.Drawing.Size(101, 12);
            this.label2.TabIndex = 4;
            this.label2.Text = "拼音与拼音首字母";
            // 
            // FormChinesePinyinMapping
            // 
            this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 12F);
            this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
            this.ClientSize = new System.Drawing.Size(1254, 732);
            this.Controls.Add(this.label2);
            this.Controls.Add(this.rtxtResult);
            this.Controls.Add(this.label1);
            this.Controls.Add(this.btnGetPinyin);
            this.Controls.Add(this.rtxtChinese);
            this.Name = "FormChinesePinyinMapping";
            this.Text = "汉字拼音对照表";
            this.ResumeLayout(false);
            this.PerformLayout();

        }

        #endregion

        private System.Windows.Forms.RichTextBox rtxtChinese;
        private System.Windows.Forms.Button btnGetPinyin;
        private System.Windows.Forms.Label label1;
        private System.Windows.Forms.RichTextBox rtxtResult;
        private System.Windows.Forms.Label label2;
    }
}

窗体FormChinesePinyinMapping代码如下:

文件:FormChinesePinyinMapping.cs

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace ChinesePinyinMappingDemo
{
    public partial class FormChinesePinyinMapping : Form
    {
        public FormChinesePinyinMapping()
        {
            InitializeComponent();
            rtxtChinese.Text = @"中国必须统一!
输出功率3
Nice!
上古十大神器:
东皇钟、昊天塔、盘古斧、轩辕剑、炼妖壶、
伏羲琴、神农鼎、崆峒印、昆仑镜、女娲石";
        }

        private void btnGetPinyin_Click(object sender, EventArgs e)
        {
            rtxtResult.Clear();
            string fileName = AppDomain.CurrentDomain.BaseDirectory + "GBK汉字拼音对照表.xls";
            DataTable dtImport = NpoiExcelOperateUtil.ExcelToTable(fileName);
            //元组的第一个元素代表汉字,第二个元素代表 第一读音,第三个元素代表拼音首字母
            //字典的键Key代表汉字,值Value是个元组 (第一读音,拼音首字母)
            Dictionary<char, Tuple<string, string>> chineseDictionary = dtImport.AsEnumerable().Select(dr => Tuple.Create(dr["汉字"].ToString().Trim().Length > 0 ? dr["汉字"].ToString().Trim()[0] : '\0',
    dr["第一读音"].ToString().Trim(), dr["第一读音"].ToString().Trim().Length > 0 ? dr["第一读音"].ToString().Trim().Substring(0, 1) : ""))
                .ToDictionary(tuple => tuple.Item1, tuple => Tuple.Create(tuple.Item2, tuple.Item3));
            //MessageBox.Show($"GBK汉字个数【{chineseDictionary.Count}】");
            string[] lines = rtxtChinese.Lines;
            for (int i = 0; i < lines.Length; i++)
            {
                rtxtResult.AppendText($"{GetSerialPinyin(lines[i], chineseDictionary)}\n");
            }
            rtxtResult.AppendText("----------获取拼音整体步骤------------\n");
            for (int i = 0; i < lines.Length; i++)
            {
                rtxtResult.AppendText($"{GetSerialFullPinyin(lines[i], chineseDictionary)}\n");
            }
        }

        /// <summary>
        /// 获取中文汉字的拼音首个字母【大写】,如果不是汉字(比如是英文或数字或标点符号等),则不做处理
        /// </summary>
        /// <param name="input"></param>
        /// <param name="chineseDictionary"></param>
        /// <returns></returns>
        private string GetFirstChar(char input, Dictionary<char, Tuple<string, string>> chineseDictionary) 
        {
            if (chineseDictionary.ContainsKey(input)) 
            {
                return chineseDictionary[input].Item2.ToUpper();
            }
            return input.ToString();
        }

        /// <summary>
        /// 获取一整段汉字并将其拼音首字母连接起来
        /// </summary>
        /// <param name="src"></param>
        /// <param name="chineseDictionary"></param>
        /// <returns></returns>
        private string GetSerialPinyin(string src, Dictionary<char, Tuple<string, string>> chineseDictionary) 
        {
            if (string.IsNullOrEmpty(src)) 
            {
                return string.Empty;
            }
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < src.Length; i++)
            {
                sb.Append(GetFirstChar(src[i], chineseDictionary));
            }
            return sb.ToString();
        }

        /// <summary>
        /// 获取一行汉字的拼音(全拼)连接起来【首字母大写】
        /// </summary>
        /// <param name="src"></param>
        /// <param name="chineseDictionary"></param>
        /// <returns></returns>
        private string GetSerialFullPinyin(string src, Dictionary<char, Tuple<string, string>> chineseDictionary)
        {
            if (string.IsNullOrEmpty(src))
            {
                return string.Empty;
            }
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < src.Length; i++)
            {
                string singleChinese = src[i].ToString();
                if (chineseDictionary.ContainsKey(src[i]))
                {
                    //获取 第一读音
                    string fullPinyin = chineseDictionary[src[i]].Item1;
                    if (fullPinyin.Length > 0) 
                    {
                        //整体全拼 首字母大写
                        fullPinyin = fullPinyin[0].ToString().ToUpper() + fullPinyin.Substring(1);
                    }
                    singleChinese = fullPinyin;
                }
                sb.Append(singleChinese);
            }
            return sb.ToString();
        }
    }
}

测试运行如图:

(我们发现:女娲石,应为NWS,但显示为NWD,因我们只考虑第一个读音石:dan)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

斯内科

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值