//
// Fundamentals of Compiler
//project: World analysis
//written by: kejie 03 computer science and techology 086
//create time: 2006.4.5
//modified: 2006.4.10
//file name: Analysis.h
//note: This file is the head file of the analysis.cpp,the declear of the class
//Copyright University of electronic science and technology of China, ZhongShan Institute.
//
#include <stdlib.h>
#include <ctype.h>
#include "BSTree.h"
#define MAXSTRTAKEN 20
#define MAXBUF 50
class LEX
{
public:
LEX();
LEX(char * path);
void LoadSource(); //打开源程序
void PrintSource(); //打印读入的源程序
int Analysis(); //开始分析
int ProError(int ErrorCode ); //错误处理,返回错误码
private:
char GetChar(); //读入字符
bool GetBc( char ReadChar ); //检测读入字符是否为空格,回车,换行
void Concat(); //将读入的ch加入到已构成的单词串中
int ResertKeyWord(); //strToken中的字符串查找关键表和内部符号表,返回内部编码
int ResertOperator();
int * InsertId(); //将strToken中的字符串插入符号表,返回符号表指针
int * InsertConst(); //将strToken中的字符串插入常数表,返回符号表指针
void Retract(); //文件指针回退一个字符位置,将ch置为空
void ReSetBuf(); //将所有变量,缓冲初始化
void ReSetStrToken();
void DigitWordAnalysis(); //数字串识别
void StringAnalysis(); //字符串关键字识别
void OperatorAnalysis(); //操作符识别
void SaveAnalysisResult(); //保存分析结果
char strToken[MAXSTRTAKEN]; //存放构成单词符号的字符串
int strPos; //构成单词符号位置
char buf[MAXBUF]; //读入缓冲
int bufPos; //缓冲位置
char ch; //最新读入源程序字符
char * FilePath; //源程序路径
ifstream Infile; //文件
};
//
// Fundamentals of Compiler
//project: World analysis
//written by: kejie 03 computer science and techology 086
//create time: 2006.4.8
//modified: 2006.4.15
//file name: Analysis.cpp
//note: Word analysis class implement
//Copyright University of electronic science and technology of China, ZhongShan Institute.
//
#include "Analysis.h"
char * SourcePath = "source.txt";
extern BSTree OperatorTable;
extern BSTree KeyTable;
LEX::LEX()
{
memset(strToken,0,MAXSTRTAKEN);
strPos = 0;
FilePath = NULL;
ch = '0';
memset(buf,0,MAXBUF);
bufPos = 0;
}
LEX::LEX(char * path)
{
memset(strToken,0,MAXSTRTAKEN);
strPos = 0;
FilePath = NULL;
ch = '0';
memset(buf,0,MAXBUF);
bufPos = 0;
FilePath = path;
}
//打开源程序
void LEX::LoadSource()
{
Infile.open(FilePath);
Infile.close();
}
//打印读入的源程序
void LEX::PrintSource()
{
Infile.open(FilePath);
//读入源程序,以行读入,忽略空格
while(Infile.getline(buf,' '))
cout<<buf<<endl;
Infile.close();
}
//重置读入缓冲
void LEX::ReSetBuf()
{
memset(buf,0,MAXBUF);
bufPos = 0;
ch = 0;
}
//重置读入字符串缓冲
void LEX::ReSetStrToken()
{
memset(strToken,0,MAXSTRTAKEN);
strPos = 0;
}
//读入字符
char LEX::GetChar()
{
return buf[bufPos++];
}
//检测读入字符是否为空格,回车,换行
bool LEX::GetBc( char ReadChar )
{
//return (buf[bufPos] == 0x20 || buf[bufPos] == 0x00 );
return (ReadChar == 0x00 || ReadChar == 0x20 || ReadChar == 0x0d || ReadChar == 0x0a || ReadChar == 0x09 );
}
//将读入的ch加入到已构成的单词串中
void LEX::Concat()
{
strToken[strPos++] = ch;
}
//strToken中的字符串查找关键表和内部符号表,返回内部编码
int LEX::ResertKeyWord()
{
return KeyTable.GetWordCode(strToken);
}
int LEX::ResertOperator()
{
return OperatorTable.GetWordCode(strToken);
}
//文件指针回退一个字符位置,将ch置为空
void LEX::Retract()
{
--bufPos;
ch = 0;
}
//将strToken中的字符串插入符号表,返回符号表指针
int * LEX::InsertId()
{
return 0;
}
//将strToken中的字符串插入常数表,返回符号表指针
int * LEX::InsertConst()
{
return 0;
}
//数字串识别
void LEX::DigitWordAnalysis()
{
while( isdigit((ch = GetChar())) )
Concat();
cout<<"< "<<strToken<<" , - >"<<endl;
Retract();
ReSetStrToken();
}
//字符串/关键字识别
void LEX::StringAnalysis()
{
//如果读入有数字则表示不是关键字
bool isKeyWorld = true;
int Code = -1;;
while( isalpha((ch = GetChar())) || isdigit(ch) )
{
if ( isdigit(ch) )
isKeyWorld = false;
Concat();
}
if ( isKeyWorld )
{
//如果查找到关键字,则输出编码,否则为普通字符串
Code = ResertKeyWord();
if ( Code >= 0x00 )
cout<<"< "<<strToken<<" , "<<hex<<Code<<" >"<<endl;
else
cout<<"< "<<strToken<<" , - >"<<endl;
}
else
{
cout<<"< "<<strToken<<" , - >"<<endl;
}
Retract();
ReSetStrToken();
}
//操作符识别
void LEX::OperatorAnalysis()
{
//如果读进时 " 时,则直接输出到下一个 " 为止
if ( ch == '"')
{
strPos = 0;
while( (ch = GetChar()) != '"')
strToken[strPos++] = ch;
cout<<"< "<<strToken<<" , - >"<<endl;
ReSetStrToken();
return;
}
//再读入一个,看看是否是二元操作符,不是的话回退一格
ch = GetChar();
if ( !isdigit(ch) && !isalpha(ch) && ResertOperator() )
{
Concat();
if ( ResertOperator() < 0 )
{
Retract();
strToken[--strPos] = 0;
}
}
else
Retract();
cout<<"< "<<strToken<<" , "<<hex<<ResertOperator()<<" >"<<endl;
//识别完一个后清空缓冲
ReSetStrToken();
}
//开始分析
int LEX::Analysis()
{
int buflen = 0;
//int bufindex;
Infile.open(FilePath);
//读入源程序,开始分析
//1.逐行读入程序,放在缓存buf中,分析完一行后清空buf,再读入
while(Infile.getline(buf,' '))
{
//先判断读入的一行是不是注释,是的话读入下一行
if ( buf[0] == '/' && buf[1] == '/')
continue;
//2.分析缓存buf,将字符串加入strToken中,//ch保存为读入的单个字符
buflen = strlen(buf);
for( bufPos=0; bufPos<buflen; )
{
ch = GetChar();
Concat();
if ( isdigit(ch) ) //识别数字串
{
DigitWordAnalysis();
}
else if ( isalpha(ch) ) //识别字符串/关键字
{
StringAnalysis();
}
else if ( GetBc( ch ) ) //判断读入符是否为空格/换行/回车
{
ReSetStrToken();
}
else //识别操作符
{
OperatorAnalysis();
}
}
ReSetBuf();
ReSetStrToken();
}
Infile.close();
return 0;
}
//保存分析结果
void SaveAnalysisResult()
{
}
//错误处理,返回错误码
int LEX::ProError( int ErrorCode )
{
return 0;
}