C++词法分析(C#描述)

近来开始学习编译原来,这是词法分析最基础的,在网上看到很多高手的作品,我也照着做了一个,有错别介意


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace 词法分析成功改造3
{
    class Program
    {
        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader("D:/test.txt");
            string wholeText = reader.ReadToEnd();
            Console.WriteLine(wholeText + "\n\n");

            LexicalAnalysis manage = new LexicalAnalysis();
            Console.WriteLine(manage.Analysis(wholeText));

            Console.WriteLine("\n\n" + manage.ErrorNO());
            Console.WriteLine(LexicalAnalysis.AllWrongText);

            Console.Read();
        }

        #region 词法分析
        class LexicalAnalysis
        {
            /// <summary>
            /// 记录字符位置
            /// </summary>
            int i = 0, j = 0;                  

            /// <summary>
            /// 行号
            /// </summary>
            public static int LineNo = 1;                         

            /// <summary>
            /// 记录识别出的单词   
            /// </summary>
            string token = "";    
          
            /// <summary>
            /// 记录每一次得到的完整信息
            /// </summary>
            string text2 = "";

            /// <summary>
            /// 用来记录错误信息
            /// </summary>
            string text3 = "";                     
 
            /// <summary>
            /// 记录异常信息
            /// </summary>
            string wrong = "";                

            /// <summary>
            /// 错误字段的个数
            /// </summary>
            public static int errors = 0;           
      
            /// <summary>
            /// 记录错误的详细信息
            /// </summary>
            public static string AllWrongText = "";            
     
            /// <summary>
            /// 标记成对出现的界符
            /// </summary>
            int flag1 = 0, flag2 = 0, flag3, flag4, flag5, flag6 = 0;                       

            /// <summary>
            /// 关键字(部分)
            /// </summary>
            string[] keyWords = new string[32]{"auto", "double", "int","struct", "break", "else",
                "long","switch","case","enum","register","typedef","char","extern","return",
                "union","const","float","short","unsigned","continue","for","signed","void","default",
                "goto","sizeof","volatile","do","if","while","static" };

            /// <summary>
            /// 运算符
            /// </summary>
            string[] operatorWords = new string[28]{"+","-","*","/","%",">","<",">=","<=","==", "-=",
                "+=","*=","/=","!=","=","%=","&","&&","|","||","!","++","--","~","<<",">>","?:"};

            /// <summary>
            /// 边界符
            /// </summary>
            string[] boundWords = new string[15] { "{", "}", "[", "]", ";", ",", ".", 
                "(", ")", ":", "\"", "#", ">", "<", "\'" };

            /// <summary>
            /// 词法分析
            /// </summary>
            /// <param name="str">所要分析的句子</param>
            /// <returns>经过分析之后的句子</returns>
            public string Analysis(string str)
            {
                string space = "";                         //空格数
                if (str.Length == 0)                           //判断字符串是否为空
                    return "";
                try
                {
                    while (str[i] != '\0')  	//读入字符判断,空格、字母、数字、界符
                    {
                        if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r')
                        {
                            i++;                                     //跳过无意义的字符
                        }
                        else if (str[i] == '\n')		//如果是换行符,则行号加1
                        {
                            LineNo++;
                            i++;
                        }

                        else if (IsLetter(str[i]))		//如果是字母
                        {
                            i = RecogniseLetter(str, i);
                            for (j = 0; j < keyWords.Length; j++)
                            {
                                if (token.CompareTo(keyWords[j]) == 0)
                                {
                                    break;
                                }
                            }

                            if (j >= keyWords.Length)					//是普通的标志符
                            {
                                for (int m = 0; m < 12 - token.Length; m++)
                                {
                                    space = space + " ";
                                }
                                text2 = text2 + LineNo.ToString() + ":  " + token + space + "标识符" + "\r\n";
                                token = "";
                                space = "";
                            }

                            if (j < keyWords.Length)								//是保留字
                            {
                                for (int m = 0; m < 12 - token.Length; m++)
                                    space = space + " ";
                                text2 = text2 + LineNo.ToString() + ":  " + keyWords[j] + space + "关键字" + "\r\n"; ;
                                token = "";
                                space = "";
                            }
                        }//end else if

                        else if (IsDigit(str[i]))		//如果是数字
                        {
                            i = RecogniseDigit(str, i);
                            for (int m = 0; m < 12 - token.Length; m++)
                            {
                                space = space + " ";
                            }
                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "常量" + "\r\n";
                            token = "";
                            space = "";
                        }//end else if

                        else if (IsBound(str[i]))                  //识别界符
                        {
                            i = RecogniseBound(str, i);
                            for (int m = 0; m < 12 - token.Length; m++)
                            {
                                space = space + " ";
                            }
                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "界符" + "\r\n"; ;
                            token = "";
                            space = "";
                        }//end else if

                        else if (IsOperator(str[i]))
                        {
                            i = RecogniseOperator(str, i);
                            for (int m = 0; m < 12 - token.Length; m++)
                            {
                                space = space + " ";
                            }
                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "运算符" + "\r\n";
                            token = "";
                            space = "";
                        }//end else if

                        else
                        {
                            error(0); i++;
                        }
                    }//end while
                }//end try

                catch (DivideByZeroException e1)
                {
                    wrong = e1.Message;
                }
                catch (IndexOutOfRangeException e2)
                {

                    wrong = e2.Message;
                }
                catch (Exception e)
                {

                    wrong = e.Message;
                }

                return text2;
            }

            /// <summary>
            /// 判断字符是不是字母
            /// </summary>
            /// <param name="ch">待定字符</param>
            /// <returns>是否</returns>
            public bool IsLetter(char ch)
            {
                if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
                {
                    return true;
                }
                else
                    return false;
            }

            /// <summary>
            /// 判断字符是不是数字
            /// </summary>
            /// <param name="ch">待定字符</param>
            /// <returns>是否</returns>
            public bool IsDigit(char ch)
            {
                if (ch >= '0' && ch <= '9')
                {
                    return true;
                }
                else
                    return false;
            }

            /// <summary>
            /// 判断字符是不是下划线
            /// </summary>
            /// <param name="ch">待定字符</param>
            /// <returns>是否</returns>
            public bool IsUnderline(char ch)
            {
                if (ch == '_')
                    return true;
                else return false;
            }

            /// <summary>
            /// 判断字符是不是边界符
            /// </summary>
            /// <param name="ch">待定字符</param>
            /// <returns></returns>
            public bool IsBound(char ch)
            {
                for (int j = 0; j < boundWords.Length; j++)
                {
                    if (ch.CompareTo(boundWords[j][0]) == 0)
                    {
                        return true;
                    }
                }
                return false;
            }

            /// <summary>
            /// 判断字符是不是操作符
            /// </summary>
            /// <param name="ch">待定字符</param>
            /// <returns>是否</returns>
            public bool IsOperator(char ch)
            {
                for (int i = 0; i < operatorWords.Length; i++)
                {
                    if (ch == operatorWords[i][0])
                    {
                        return true;
                    }
                }
                return false;
            }

            /// <summary>
            /// 识别整个操作符
            /// </summary>
            /// <param name="str">字符串</param>
            /// <param name="i">字符串中字符的位置</param>
            /// <returns>字符位置</returns>
            private int RecogniseOperator(string str, int i)
            {
                char state = '0';
                string temp = "";

                while (state != '2')
                {
                    switch (state)
                    {
                        case '0':
                            temp += str[i];
                            i++;
                            state = '1';
                            break;

                        case '1':
                            //判断为双个运算符 
                            if (str.Substring(i - 1, 2) == "++" || str.Substring(i - 1, 2) == "--" || str.Substring(i - 1, 2) == "<<" || str.Substring(i - 1, 2) == ">>" || str.Substring(i - 1, 2) == "+=" || str.Substring(i - 1, 2) == "-=" || str.Substring(i - 1, 2) == "*=" || str.Substring(i - 1, 2) == "/=" || str.Substring(i - 1, 2) == "!=" || str.Substring(i - 1, 2) == "%=")
                            {
                                temp += str[i];
                                i++;
                                state = '2';
                            }
                            //三目运算符
                            if (str[i - 1] == '?' && str[i] == ':')
                            {
                                temp += str[i];
                                i++;
                                state = '2';
                            }
                            //不是运算符
                            else
                            {
                                state = '2';
                            }
                            break;
                    }//end switch
                }//end while

                token = temp;
                return i;
            }

            /// <summary>
            /// 识别普通的标识符
            /// </summary>
            /// <param name="str">用来识别的字符串</param>
            /// <param name="i">字符串中字符的位置</param>
            /// <returns>字符位置</returns>
            public int RecogniseLetter(string str, int i)                          //识别单词
            {
                char state = '0';
                string temp = "";                             //记录单词
                while (state != '2')
                {
                    switch (state)
                    {
                        case '0':
                            if (IsLetter(str[i]))
                            {
                                state = '1';
                                temp = temp + str[i];
                                i++;
                            }
                            break;

                        case '1':
                            if (IsLetter(str[i]) || IsDigit(str[i]) || IsUnderline(str[i]))
                            {
                                state = '1';
                                temp = temp + str[i];
                                i++;
                            }
                            else
                                state = '2';
                            break;
                    }//end switch
                }//end while

                token = temp;                   //记录识别的字符串
                return i;
            }

            /// <summary>
            /// 识别整个常熟
            /// </summary>
            /// <param name="str">待测字符串</param>
            /// <param name="i">字符串中字符的位置</param>
            /// <returns>字符位置</returns>
            public int RecogniseDigit(string str, int i)                                //识别常数
            {
                char state = '0';
                string temp = "";

                while (state != '2')
                {
                    switch (state)
                    {
                        case '0':
                            if (IsDigit(str[i]))
                            {
                                temp += str[i];
                                state = '1';
                                i++;
                            }
                            break;

                        case '1':
                            if (IsDigit(str[i]))
                            {
                                temp += str[i];
                                state = '1';
                                i++;
                            }
                            else if (str[i] == '.' && IsDigit(str[i + 1])) //有小数点的数                         
                            {
                                temp += str[i];
                                state = '1';
                                i++;
                            }
                            else 
                                state = '2';
                            break;
                    }//end switch                  
                }//end while

                token = temp;
                return i;
            }

            /// <summary>
            /// 识别标识符
            /// </summary>
            /// <param name="str">待测字符串</param>
            /// <param name="i">字符串中字符的位置</param>
            /// <returns>字符位置</returns>
            public int RecogniseBound(string str, int i)
            {
                string temp = "";
                for (int k = 0; k < boundWords.Length; k++)
                {
                    if (str[i].CompareTo(boundWords[k][0]) == 0)
                    {
                        temp += str[i];
                        i++;
                        break;
                    }
                    else 
                        continue;
                }//end for

                token = temp;

                //这些标志数是用来等会做错误判断用的(没有成双出现就是错误)
                if (token == "{" || token == "}")
                    flag1++;

                else if (token == "[" || token == "]")
                    flag2++;

                else if (token == "(" || token == ")")
                    flag3++;

                else if (token == "<" || token == ">")
                    flag4++;

                else if (token == "\'")
                    flag5++;

                else if (token == "\"")
                    flag6++;

                return i;
            }

            /// <summary>
            /// 得到已经识别的字符串
            /// </summary>
            /// <param name="str">字符串</param>
            /// <param name="k">字符位置</param>
            /// <returns>字符位置</returns>
            public int GetetToken(string str, int k)                       
            {
                switch (k)
                {
                    case 1:
                        for (int i = 0; i < keyWords.Length; i++)  //关键字
                        {
                            if (str == keyWords[i])
                                return i;
                        }
                        break;

                    case 2:
                        for (int i = 0; i < operatorWords.Length; i++)  //运算符
                        {
                            if (str == operatorWords[i])
                                return i + 32;   //+32的原因是keyWords中有32个关键字
                        }
                        break;

                    case 3:
                        for (int i = 0; i < boundWords.Length; i++) //界符
                        {
                            if (str == boundWords[i])
                                return i + 60;   //+60的原因是keyWords+operatorWords有60个
                        }
                        break;
                }//end switch

                return 0;
            }

            /// <summary>
            /// 错误个数
            /// </summary>
            /// <returns>错误信息</returns>
            public string ErrorNO()                                      
            {
                error(1);
                text3 = errors.ToString() + "   errors";
                return text3;
            }

            /// <summary>
            /// 错误信息
            /// </summary>
            /// <param name="k"></param>
            public void error(int k)                                     
            {
                switch (k)
                {
                    case 0:    //输入了非法字符
                        AllWrongText = AllWrongText + LineNo.ToString() + ": " + "非法字符" + "\r\n";  
                        errors++;
                        break;

                    case 1:   //界符不匹配
                        if (flag1 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + "{ 不匹配" + "\r\n";
                            errors++;
                        }
                        if (flag2 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + "[ 不匹配" + "\r\n";
                            errors++;
                        }
                        if (flag3 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + "( 不匹配" + "\r\n";
                            errors++;
                        }
                        if (flag4 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + "< 不匹配" + "\r\n";
                            errors++;
                        }
                        if (flag5 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + "' 不匹配" + "\r\n";
                            errors++;
                        }
                        if (flag6 % 2 != 0)
                        {
                            AllWrongText = AllWrongText + " \" 不匹配" + "\r\n";
                            errors++;
                        }
                        break;
                }//end switch
            }

        }
        #endregion





    }
}


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值