词法分析器实验报告

词法分析器实验报告

1.实验平台:

visual studio 2010

2.实验目的

 设计、编制、调试一个词法分析子程序-识别单词,加深对词法分析原理的理解。

3.数字状态转换图:

4本程序自行规定:

(1)关键字"begin","end","if","then","else","while","write","read",

"do", "call","const","char","until","procedure","repeat"

(2)运算符:"+","-","*","/","="

(3)界符:"{","}","[","]",";",",",".","(",")",":"

(4)其他标记 如字符串,表示以字母开头的标识符。
(5)空格、回车、换行符跳过。

5.程序源代码:

源文件主要放在三个文件中:stdafx.h主要存放头文件stdafx.cpp主要存放函数的定义Compilertest.cpp :主函数定义

// stdafx.h : 标准系统包含文件的包含文件,

// 或是经常使用但不常更改的

// 特定于项目的包含文件

#pragma once

#include "targetver.h"

#include <stdio.h>

#include <tchar.h>

#include <iostream>

#include <fstream>

#include <string>

#include "compiler.h"

using namespace std;

int const keyLength=26;                     //定义关键字数组的大小

int const OPLENGTH1=8;                      //定义运算符数组大小

int const JLENGTH=9;                        //定义界符数组的大小

int const BFLENGTH=640;                     //定义双缓冲数组大小

//函数声明

char firstRead();                           //首次读取

char getChar();                             //读取一个字符

bool isLetter(char C);                      //判断字符串

bool isDigit(char C);                       //判断数字

bool isUnderLine(char C);                   //判断是否为下划线

bool isKeyLetter(string str);               //判断关键字

bool isJieFu(char C);                       //判断界符

bool isOperator(char C);                    //判断运算符

void back();                                //当前指针向前退一个

void myPrint(string s1,string s2);          //自定义文件输出函数

char ciFaFenXi(char now);                   //词法分析函数

void  readFBiao( );                         //读取符号表

bool inGuanJianBiao(string fuhaobiao[2][600],string str);

bool inBainLiangBiao(string fuhaobiao[2][600],string str);

stdafx.cpp :

// stdafx.cpp : 只包括标准包含文件的源文件

// Compilertest.pch 将作为预编译头

// stdafx.obj 将包含预编译类型信息

#include "stdafx.h"

char buffer1[BFLENGTH];     //双缓冲1

char buffer2[BFLENGTH];     //双缓冲2

//定义符号表的当前大小

int fcount1=0;

int fcount2=0;

//定义关键字数组

string keyStr[keyLength]={"cout","cin","this","if","else","while","true","false","iostream","new","main","infile","std","endl",

    "using","namespace","char","int","double","float","bool","return","include","public","class","void"};

//定义变量

char myOperator1[OPLENGTH1]={'>','<','+','-','*','/','=','!'};      //运算符数组

char jiefu[JLENGTH]={'{','}','(',')',':',';','.','#',','};          //界符

//定义文件读取,输出流变量

ifstream infile("G://a.txt",ios::binary);

ofstream outfile("G://b.txt",ios::binary);

char now;                                                           //储存当前字符

string token="";                                                    //用于存放字符串

char *startP,*forwardP;                                             //定义开始和向前指针

string fuhaobiao[2][600];                                           //定义一个符号表(存放标识符)

//判断C是否为字母

bool isLetter(char C){

    if ((C>='a'&&C<='z')||(C>='A'&&C<='Z')){

        return true;

    }

    else return false;

}

//判断是否为数字

bool isDigit(char C){

    if(C>='0'&&C<='9'){

        return  true;

    }else 

        return false;

}

//判断是否为界符

bool isJieFu(char C){

    

    for (int i=0;i<JLENGTH;i++){

        if (C==jiefu[i]){

            return true;

        }else 

            continue;

    }

    return false;

}

//判断是否为运算符

bool isOperator(char C){

    for (int i=0;i<OPLENGTH1;i++){

        if (C==myOperator1[i]){

            return true;

        }else 

            continue;

    }

    return false;

}

//判断是否为下划线

bool isUnderLine(char C){

    if(C=='_'){

        return  true;

    }else 

        return false;

}

//判断是否为系统关键字

bool isKeyLetter(string str){

    for(int i=0;i<keyLength;i++){

        if (str==keyStr[i]){

            return true;

        }

    }

    return false;

}

//自定义输出函数

void myPrint(string s1,string s2){

        if (!outfile){

    cout <<"文件打开失败"<<endl;

    exit(1);

    }

        

    cout<<"< "<<s1<<" , "<<s2<<" >\r\n"<<endl;    

    outfile<<"< "<<s1<<" , "<<s2<<" >\r"<<endl;

}

//定义遍历符号表函数

void readFBiao(  ){

    for (int i=0;i<2;i++){

        if (i==0){       //遍历关键字符号表

            for (int j=0;j<fcount1;j++){

            cout<<fuhaobiao[i][j]<<endl;

            }

        }

        //变量符号表

        if (i==1){      //遍历变量符号表

            for (int j=0;j<fcount2;j++){

                if (j==0){

                cout<<"输出变量表中的内容!"<<endl;

                }

            cout<<fuhaobiao[i][j]<<endl;

            }

        }   

    }

}

//定义查看是否存在符号表中的函数

bool inGuanJianBiao(string fuhaobiao[2][600],string str){   

            for (int j=0;j<fcount1;j++){

                if (str==fuhaobiao[0][j]){

                return true;

                }

            }

            return false;

}

bool inBainLiangBiao(string fuhaobiao[2][600],string str){  

            for (int j=0;j<fcount2;j++){

                if (str==fuhaobiao[1][j]){

                return true;

                }

            }

            return false;

}

char firstRead(){

buffer1[BFLENGTH-1]=EOF;

    buffer2[BFLENGTH-1]=EOF;

    if (!infile){

    cout <<"文件打开失败"<<endl;

    exit(1);

    }

    forwardP=buffer1;//让指针指向第一个缓冲区

    //把字符读入buffer1

    for(int i=0;i<BFLENGTH-2;i++){

        if(infile.get(buffer1[i])){

        }else{

            infile.close();

            buffer1[i]=EOF;

            break;

        }   

    }

    now= getChar();

    return now;

}

//定义词法分析函数

 char ciFaFenXi(char now){

    //循环获取

    while(1){

        while (now==' '){now=getChar(); }

        if (now=='\r'){                     //判断换行符

            now=getChar();

            if (now=='\n'){now=getChar();}          

            continue;

        }else if(now==EOF) {return EOF ;

        }else if (isLetter(now)){          //判断字符串

            token+=now;

            now=getChar();

            while(isLetter(now)||isDigit(now)||isUnderLine(now)){

                token+=now;

                now=getChar();

            }   

            if (isKeyLetter(token)){        //判断关键字类型

                myPrint(token,"关键字");

                if (!inGuanJianBiao( fuhaobiao,token)){

                fuhaobiao[0][fcount1++]=token;    ///加入符号表中

                }

            }else{

                myPrint(token,"变量");

                if (!inBainLiangBiao( fuhaobiao,token)){

                    fuhaobiao[1][fcount2++]=token; 

                }

            }

            token="";

            return now;

        }else if (isDigit(now)){            //判断整数

            token+=now;

            now=getChar();

            while(isDigit(now)){

                token+=now;

                now=getChar();

            }

            if (now=='.'){                  //判断小数

                token+=now;

                now=getChar();

                while(isDigit(now)){

                    token+=now;

                    now=getChar();

                }

                myPrint(token,"小数");

                token="";

                return now;

            }

            myPrint(token,"整数");

            token="";

           return now;

        }else if (isJieFu(now)){             //判断界符

            string result;

            myPrint(result.assign(1,now),"界符");

            now=getChar();

            return now;

        }else if (isOperator(now)){         //判断运算符

            token+=now;

            now=getChar();

            if (now=='='){

                token+=now;

                now=getChar();

                myPrint(token,"双目运算符");

                token="";

                return now;

            }

            if(token!="!")

            myPrint(token,"单目运算符");

            else myPrint(token,"其他");

            token="";

            

            return now;

        }else {

            string result;

            myPrint(result.assign(1,now),"其他");

            now=getChar();

            return now;

        }

        return now;

    }

    infile.close();

}

//自定义getChar()函数,运用了双缓冲,有两个字符数组作为缓冲区

char getChar(){

    char next;//定义储存下一个读入的字符    

    if(*forwardP==EOF){//如果到缓冲区末尾,但不知是哪个缓冲区尾

        if (forwardP==buffer1+BFLENGTH-1){//如果到了buffer1的末尾

            //读入buffer2

            for(int i=0;i<BFLENGTH-1;i++){

                if(infile.get(buffer2[i])){

    

                }else{

                    infile.close();

                    buffer2[i]=EOF;

                    break;

                    //exit(1);

                }

            }

            forwardP=buffer2;

            getChar();

        

        }else if (forwardP==buffer2+BFLENGTH-1){//如果到了buffer2的末尾

            //读入buffer1

            for(int i=0;i<BFLENGTH-1;i++){

                if(infile.get(buffer1[i])){

                //cout<<buffer1[i]<<"";

                }else{

                    //cout<<endl<<"The file is end!!!!!!!!!";

                    infile.close();

                    buffer1[i]=EOF;

                    break;

                    //exit(1);

                }

            }

        

            forwardP=buffer1;

            getChar();

        

        }else{

            infile.close();     

            return EOF;

        //exit(1);

        }

    }else{

        if (*forwardP==EOF){

            exit(1);

        }

        next=*forwardP;//把他付给next

        //cout<<next;

        forwardP+=1;//向下走一个

        return next;

    }

}

//定义后退一个函数

void back(){

    if(forwardP==buffer1){//如果当前指针指向buffer1的开头

        forwardP=buffer2+BFLENGTH-2;//转而指向buffer2的末尾

    }else if(forwardP==buffer2){

        forwardP=buffer1+BFLENGTH-2;//转而指向buffer1的末尾

    }else {

        forwardP-=1;

    }   }

Compilertest.cpp主函数:

// Compilertest.cpp : 定义控制台应用程序的入口点。

//

//@author lixingle

//词法分析器小程序

#include "stdafx.h"

using namespace std;

char nextC;

int main(){

 nextC= firstRead();

 //调用词法分析函数  

  while(nextC!=EOF)

  nextC=ciFaFenXi(nextC);

 //遍历符号表

  cout <<"遍历符号表开始!"<<endl;

  readFBiao( );

  return 0;

}

4.测试结果:


转载于:https://www.cnblogs.com/lixingle/archive/2013/03/29/3312993.html

  • 2
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
设计思想 (1)程序主体结构部分: 说明部分 %% 规则部分 %% 辅助程序部分 (2)主体结构的说明 在这里说明部分告诉我们使用的LETTER,DIGIT, IDENT(标识符,通常定义为字母开头的字母数字串)和STR(字符串常量,通常定义为双引号括起来的一串字符)是什么意思.这部分也可以包含一些初始化代码.例如用#include来使用标准的头文件和前向说明(forward ,references).这些代码应该再标记"%{"和"%}"之间;规则部分>可以包括任何你想用来分析的代码;我们这里包括了忽略所有注释中字符的功能,传送ID名称和字符串常量内容到主调函数和main函数的功能. (3)实现原理 程序中先判断这个句语句中每个单元为关键字、常数、运算符、界符,对与不同的单词符号给出不同编码形式的编码,用以区分之。 PL/0语言的EBNF表示 <常量定义>::=<标识符>=<无符号整数>; <标识符>::=<字母>={<字母>|<数字>}; <加法运算符>::=+|- <乘法运算符>::=*|/ <关系运算符>::==|#|<|<=|>|>= <字母>::=a|b|…|X|Y|Z <数字>::=0|1|2|…|8|9 三:设计过程 1. 关键字:void,main,if,then,break,int,Char,float,include,for,while,printfscanf 并为小写。 2."+”;”-”;”*”;”/”;”:=“;”:”;”<“;”<=“;”>“;”>=“;”<>“;”=“;”(“;”)”;”;”;”#”为运算符。 3. 其他标记 如字符串,表示以字母开头的标识符。 4. 空格符跳过。 5. 各符号对应种别码 关键字分别对应1-13 运算符分别对应401-418,501-513。 字符串对应100 常量对应200 结束符# 四:举例说明 目标:实现对常量的判别 代码: digit [0-9] letter [A-Za-z] other_char [!-@\[-~] id ({letter}|[_])({letter}|{digit}|[_])* string {({letter}|{digit}|{other_char})+} int_num {digit}+ %% [ |\t|\n]+ "auto"|"double"|"int"|"struct"|"break"|"else"|"long"|"switch"|"case"|"enum"|"register"|"typedef"|"char"|"extern"|"return"|"union"|"const"|"float"|"short"|"unsigned"|"continue"|"for"|"signed"|"void"|"default"|"goto"|"sizeof"|"do"|"if"|"static"|"while"|"main" {Upper(yytext,yyleng);printf("%s,NULL\n",yytext);} \"([!-~])*\" {printf("CONST_string,%s\n",yytext);} -?{int_num}[.]{int_num}?([E][+|-]?{int_num})? {printf("CONST_real,%s\n",yytext);} "0x"?{int_num} {printf("CONST_int,%s\n",yytext);} ","|";"|"("|")"|"{"|"}"|"["|"]"|"->"|"."|"!"|"~"|"++"|"--"|"*"|"&"|"sizeof"|"/"|"%"|"+"|"-"|">"|"<"|">="|"<="|"=="|"!="|"&"|"^"|"|"|"&"|"||"|"+="|"-="|"*="|"/="|"%="|">>="|"<<="|"&="|"^="|"|="|"=" {printf("%s,NULL\n",yytext);} {id} {printf("ID,%s\n",yytext);} {digit}({letter})+ {printf("error1:%s\n",yytext);} %% #include <ctype.h> Upper(char *s,int l) { int i; for(i=0;i<l;i++) { s[i]=toupper(s[i]); } } yywrap() { return 1; } 五:DFA 六:数据测试 七:心得体会 其实匹配并不困难,主要是C++知识要求相对较高,只要把握住指针就好了。 附源程序: #include<iostream.h> #include<stdio.h> #include<stdlib.h> #include<string.h> int i,j,k,flag,number,status; /*status which is use to judge the string is keywords or not!*/ char ch; char words[10] = {" "}; char program[500]; int Scan(char program[]) { char *keywords[13] = {"void","main","if","then","break","int", "char","float","include","for","while","printf", "scanf"}; number = 0; status = 0; j = 0; ch = program[i++]; /* To handle the lettle space ands tab*/ /*handle letters*/ if ((ch >= 'a') && (ch <= 'z' )) { while ((ch >= 'a') && (ch <= 'z' )) { words[j++]=ch; ch=program[i++]; } i--; words[j++] = '\0'; for (k = 0; k < 13; k++) if (strcmp (words,keywords[k]) == 0) switch(k) { case 0:{ flag = 1; status = 1; break; } case 1:{ flag = 2; status = 1; break; } case 2:{ flag = 3; status = 1; break; } case 3:{ flag = 4; status = 1; break; } case 4:{ flag = 5; status = 1; break; } case 5:{ flag = 6; status = 1; break; } case 6:{ flag = 7; status = 1; break; } case 7:{ flag = 8; status = 1; break; } case 8:{ flag = 9; status = 1; break; } case 9:{ flag = 10; status = 1; break; } case 10:{ flag = 11; status = 1; break; } case 11:{ flag = 12; status = 1; break; } case 12:{ flag = 13; status = 1; break; } } if (status == 0) { flag = 100; } } /*handle digits*/ else if ((ch >= '0') && (ch <= '9')) { number = 0; while ((ch >= '0' ) && (ch <= '9' )) { number = number*10+(ch-'0'); ch = program[i++]; } flag = 200; i--; } /*opereation and edge handle*/ else switch (ch) { case '=':{ if (ch == '=') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 401; } else { i--; flag = 402; } break; } case'>':{ if (ch == '>') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 403; } else { i--; flag = 404; } break; } case'<':{ if (ch == '<') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 405; } else { i--; flag = 406; } break; } case'!':{ if (ch == '!') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 407; } else { i--; flag = 408; } break; } case'+':{ if (ch == '+') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 409; } else if (ch == '+') { words[j++] = ch; words[j] = '\0'; flag = 410; } else { i--; flag = 411; } break; } case'-':{ if (ch == '-') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 412; } else if( ch == '-') { words[j++] = ch; words[j] = '\0'; flag = 413; } else { i--; flag = 414; } break; } case'*':{ if (ch == '*') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 415; } else { i--; flag = 416; } break; } case'/':{ if (ch == '/') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 417; } else { i--; flag = 418; } break; } case';':{ words[j] = ch; words[j+1] = '\0'; flag = 501; break; } case'(':{ words[j] = ch; words[j+1] = '\0'; flag = 502; break; } case')':{ words[j] = ch; words[j+1] = '\0'; flag = 503; break; } case'[':{ words[j] = ch; words[j+1] = '\0'; flag = 504; break; } case']':{ words[j] = ch; words[j+1] = '\0'; flag = 505; break; } case'{':{ words[j] = ch; words[j+1] = '\0'; flag = 506; break; } case'}':{ words[j] = ch; words[j+1] = '\0'; flag = 507; break; } case':':{ words[j] = ch; words[j+1] = '\0'; flag = 508; break; } case'"':{ words[j] = ch; words[j+1] = '\0'; flag = 509; break; } case'%':{ if (ch == '%') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 510; } else { i--; flag = 511; } break; } case',':{ words[j] = ch; words[j+1] = '\0'; flag = 512; break; } case'#':{ words[j] = ch; words[j+1] = '\0'; flag = 513; break; } case'@':{ words[j] = '#'; flag = 0; break; } default:{ flag = -1; break; } } return flag; } main() { i=0; printf("please input a program end with @"); do { ch = getchar(); program[i++] = ch; }while(ch != '@'); i = 0; do{ flag = Scan(program); if (flag == 200) { printf("(%2d,%4d)",flag,number); } else if (flag == -1) { printf("(%d,error)",flag); } else { printf("(%2d,%4s)",flag,words); } }while (flag != 0); system("pause"); }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值