C#实现的字典程序

 

网上看到的代码. 折腾了一会儿. 开始的时候觉得用hash表效率比较高;但是修改完代码之后,发现内存太多..50W的记录,要50多M的内存.而直接使用stream或者MMF的话,内存只有不到3M多..而效率并没有差很多,都是毫秒级的..使用哈希表时,耗时一直为0ms..

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Diagnostics;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            Dict dict = new Dict("病症");

            //dict.Add("中国", "China");
            //dict.Add("北京", "beijing");
            //dict.Add("中国人", "Chinese");
            //for (int i = 0; i < 100000; i++)
            //{
            //    dict.Add("中国人1" + i, "Chinese1" + i);
            //    dict.Add("中国人2" + i, "Chinese2" + i);
            //    dict.Add("中国人3" + i, "Chinese3" + i);
            //    dict.Add("中国人4" + i, "Chinese4" + i);

            //}            

            //dict.Save();

            dict = new Dict();

            while (true)
            {
                Console.Write("请输入词语:");
                var w = Console.ReadLine();
                Stopwatch sw = new Stopwatch();
                sw.Start();
                Console.WriteLine("找到词语:");
                Console.WriteLine(dict.GetDescription(w));
                sw.Stop();
                Console.WriteLine("耗时:" + sw.ElapsedMilliseconds + "ms");

            }
        }
    }

    /// <summary>
    /// 词典
    /// </summary>
    class Dict
    {
        DictInfo info;
        SortedList<string, DictIndex> indexs = new SortedList<string, DictIndex>();
        List<DictWord> words = new List<DictWord>();

        /// <summary>
        /// 索引文件
        /// </summary>
        string idxFile = "dic.idx";

        /// <summary>
        /// 数据文件
        /// </summary>
        string dictfile = "dic.dict";

        /// <summary>
        /// 词典信息文件
        /// </summary>
        string ifoFile = "dic.ifo";

        MemoryMappedFile idxMMFReader;        
        MemoryMappedFile dictMMFReader;
        MemoryMappedViewStream idixStream;
        private MemoryMappedViewStream dictStream;

        /// <summary>
        /// 查询使用
        /// </summary>
        public Dict()
        {
            LoadDictInfo();
            idxMMFReader = MemoryMappedFile.CreateFromFile(idxFile, FileMode.Open);
            idixStream = idxMMFReader.CreateViewStream();
            dictMMFReader = MemoryMappedFile.CreateFromFile(dictfile, FileMode.Open);
            dictStream = dictMMFReader.CreateViewStream();
        }

        /// <summary>
        /// 创建时使用
        /// </summary>
        /// <param name="name"></param>
        public Dict(string name)
        {
            info = new DictInfo { BookName = name, WordCount = 0, CurrentOffset = 0 };
            indexs = new SortedList<string, DictIndex>();
            words = new List<DictWord>();
        }

        /// <summary>
        /// 获取词语解释
        /// </summary>
        /// <param name="word"></param>
        /// <returns></returns>
        public string GetDescription(string word)
        {
            var i = 0;
            var mid = info.WordCount / 2;
            var max = info.WordCount;
            DictIndex w = new DictIndex();
            while (i <= max)
            {
                mid = (i + max) / 2;
                w = GetWordIndex(mid);
                if (string.Compare(w.Word, word) > 0)
                {
                    max = mid - 1;
                }
                else if (string.Compare(w.Word, word) < 0)
                {
                    i = mid + 1;
                }
                else
                {
                    break;
                }
            }

            return "[" + w.Word + "]\n" + GetWordDescription(w);
        }



        /// <summary>
        ///  获取指定位置的索引
        /// </summary>
        /// <param name="wordIndex"></param>
        /// <returns></returns>
        public DictIndex GetWordIndex(int wordIndex)
        {
            //using (MemoryMappedViewStream idixStream = idxMMFReader.CreateViewStream())
            {
                byte[] word = new byte[128];
                byte[] offset = new byte[4];
                byte[] size = new byte[4];
                idixStream.Seek(0, SeekOrigin.Begin);
                idixStream.Seek(wordIndex * 136, SeekOrigin.Begin);
                idixStream.Read(word, 0, 128);
                idixStream.Read(offset, 0, 4);
                idixStream.Read(size, 0, 4);

                var dicIndex = new DictIndex();
                dicIndex.Word = Encoding.UTF8.GetString(word).Replace("\0", "");
                dicIndex.Offset = BitConverter.ToInt32(offset, 0);
                dicIndex.DataSize = BitConverter.ToInt32(size, 0);
                return dicIndex;
            }
        }

        /// <summary>
        ///  获取指定词语的解释
        /// </summary>
        /// <param name="wordIndex"></param>
        /// <returns></returns>
        public string GetWordDescription(DictIndex dictIndex)
        {
            //using (MemoryMappedViewStream idixStream = dictMMFReader.CreateViewStream())
            {
                dictStream.Seek(0, SeekOrigin.Begin);
                if (dictIndex.Offset != 0)
                    dictStream.Seek(dictIndex.Offset, SeekOrigin.Begin);
                byte[] word = new byte[dictIndex.DataSize];
                idixStream.Read(word, 0, dictIndex.DataSize);
                return Encoding.UTF8.GetString(word).Replace("\0", "");
            }
        }

        /// <summary>
        /// 添加词语
        /// </summary>
        /// <param name="word"></param>
        /// <param name="explation"></param>
        public void Add(string word, string description)
        {

            words.Add(new DictWord() { Description = description });
            indexs.Add(word, new DictIndex { DataSize = Encoding.UTF8.GetBytes(description).Length, Offset = info.CurrentOffset, Word = word });
            // 数量++
            info.WordCount++;
            // 偏移++
            info.CurrentOffset += Encoding.UTF8.GetBytes(description).Length;
        }

        /// <summary>
        /// 加载词典信息
        /// </summary>
        void LoadDictInfo()
        {
            var infos = File.ReadAllLines(ifoFile);
            info = new DictInfo
            {
                BookName = infos[0].Replace("BookName=", "").Trim(),
                WordCount = int.Parse(infos[1].Replace("WordCount=", "").Trim()),
                CurrentOffset = int.Parse(infos[2].Replace("CurrentOffset=", "").Trim()),
            };
        }

        /// <summary>
        /// 保存
        /// </summary>
        public void Save()
        {

            StringBuilder dicBuilder = new StringBuilder();
            dicBuilder.AppendLine(string.Format("BookName={0}", info.BookName));
            dicBuilder.AppendLine(string.Format("WordCount={0}", info.WordCount));
            dicBuilder.AppendLine(string.Format("CurrentOffset={0}", info.CurrentOffset));
            File.WriteAllText(ifoFile, dicBuilder.ToString(), Encoding.UTF8);

            dicBuilder = new StringBuilder();

            using (BinaryWriter idxWriter = new BinaryWriter(File.Open(dictfile, FileMode.OpenOrCreate)))
            {
                foreach (var word in words)
                {
                    idxWriter.Write(Encoding.UTF8.GetBytes(word.Description));
                }
                idxWriter.Close();
            }

            using (BinaryWriter idxWriter = new BinaryWriter(File.Open(idxFile, FileMode.OpenOrCreate)))
            {
                foreach (var index in indexs)
                {
                    // 分块大小  128+4+4  = 136

                    // word 最长128
                    byte[] word = new byte[128];
                    var wordData = Encoding.UTF8.GetBytes(index.Key);
                    var length = Math.Min(128, wordData.Length);
                    for (var i = 0; i < length; i++)
                    {
                        word[i] = wordData[i];
                    }
                    idxWriter.Write(word);
                    byte[] re = new byte[4];

                    idxWriter.Write(index.Value.Offset);
                    idxWriter.Write(index.Value.DataSize);
                }
                idxWriter.Close();
            }

        }
    }

    /// <summary>
    ///  词语解释
    /// </summary>
    class DictWord
    {
        /// <summary>
        /// 解析
        /// </summary>
        public string Description
        {
            get;
            set;
        }
    }

    /// <summary>
    /// 词典索引
    /// </summary>
    class DictIndex
    {
        /// <summary>
        /// 词语
        /// </summary>
        public string Word
        {
            get;
            set;
        }

        /// <summary>
        /// 偏移
        /// </summary>
        public int Offset
        {
            get;
            set;
        }

        /// <summary>
        /// 数据大小
        /// </summary>
        public int DataSize
        {
            get;
            set;
        }
    }

    /// <summary>
    /// 词典信息
    /// </summary>
    class DictInfo
    {
        /// <summary>
        /// 词典名称
        /// </summary>
        public string BookName
        {
            get;
            set;
        }

        /// <summary>
        /// 收录词数
        /// </summary>
        public int WordCount
        {
            get;
            set;
        }

        /// <summary>
        /// 当前偏移
        /// </summary>
        public int CurrentOffset
        {
            get;
            set;
        }
    }
}

转载于:https://my.oschina.net/wower/blog/37157

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值