LCC编译器的源程序分析(5)行号同步与类型初始化

最新推荐文章于 2024-05-22 20:31:44 发布

anjichan4261

最新推荐文章于 2024-05-22 20:31:44 发布

阅读量132

点赞数

文章标签： c/c++

原文链接：http://www.cnblogs.com/ajuanabc/archive/2007/05/15/2464365.html

版权

上面已经介绍打开文件输入，并且分析了读取到缓冲区里的代码，接着下来就是分析行号同步的处理，还有类型初始化。

先来看看生成中间文件 hello.i 中的源程序，在它的第 1 行和第 2 行如下：

#001 #line 1 "hello.c"

#002 #line 1 "include/stdio.h"

#003

#004

#005

#006

#007 typedef unsigned int size_t;

这样的源程序是怎么样被处理的呢？像＃ line 参数就是用来识别文件的行号同步和文件名称的。现在就来分析函数 nextline ，它就会处理这样的源程序，让行号同步和源程序的文件名称也同步更新，这样就可以定位源程序出错时所在的位置，比如在编译 C 程序时就可以看到在某行某列出错，然后双击鼠标，就可以跑到相应的源程序位置进行查看和修改了。如下所示：

Warning 1 warning C4101: 'dst' : unreferenced local variable g:/cnasm/cncc/src/alpha.c 4798

nextline 函数的源程序如下：

#001 void nextline(void)

#002 {

#003 do

#004 {

#005 if (cp >= limit)

#006 {

#007 fillbuf();

#008 if (cp >= limit)

#009 cp = limit;

#010 if (cp == limit)

#011 return;

#012 }

#013 else

#014 {

#015 lineno++;

#016 for (line = (char *)cp; *cp==' ' || *cp=='/t'; cp++)

#017 ;

#018

#019 if (*cp == '#')

#020 {

#021 resynch();

#022 nextline();

#023 }

#024 }

#025

#026 } while (*cp == '/n' && cp == limit);

#027 }

第 5 行到第 12 行是分析缓冲区内容完成后，重新读取文件数据到缓冲区里。

第 15 行是增加源程序的行号，它就是用来表示记号所在的行号。

第 16 行是跳过连续的空格和制表符。

第 19 行到 23 行是处理行号同步和文件同步，后面接着分析它。

第 26 行是处理一行代码完成，再处理下一行代码。

下面接着看函数 resynch ，它是进行＃开始的参数处理：

#001 static void resynch(void)

#002 {

#003 for (cp++; *cp == ' ' || *cp == '/t'; )

#004 cp++;

#005

#006 if (limit - cp < MAXLINE)

#007 fillbuf();

#008

#009 if (strncmp((char *)cp, "pragma", 6) == 0)

#010 {

#011 cp += 6;

#012 pragma();

#013 }

#014 else if (strncmp((char *)cp, "ident", 5) == 0)

#015 {

#016 cp += 5;

#017 ident();

#018 }

#019 else if (*cp >= '0' && *cp <= '9')

#020 {

#021 line: for (lineno = 0; *cp >= '0' && *cp <= '9'; )

#022 lineno = 10*lineno + *cp++ - '0';

#023 lineno--;

#024 while (*cp == ' ' || *cp == '/t')

#025 cp++;

#026

#027 if (*cp == '"')

#028 {

#029 file = (char *)++cp;

#030 while (*cp && *cp != '"' && *cp != '/n')

#031 cp++;

#032 file = stringn(file, (char *)cp - file);

#033 if (*cp == '/n')

#034 warning("missing /" in preprocessor line/n");

#035 if (firstfile == 0)

#036 firstfile = file;

#037 }

#038 }

#039 else if (strncmp((char *)cp, "line", 4) == 0)

#040 {

#041 for (cp += 4; *cp == ' ' || *cp == '/t'; )

#042 cp++;

#043 if (*cp >= '0' && *cp <= '9')

#044 goto line;

#045 if (Aflag >= 2)

#046 warning("unrecognized control line/n");

#047 }

#048 else if (Aflag >= 2 && *cp != '/n')

#049 warning("unrecognized control line/n");

#050

#051 while (*cp)

#052 if (*cp++ == '/n')

#053 if (cp == limit + 1)

#054 {

#055 nextline();

#056 if (cp == limit)

#057 break;

#058 }

#059 else

#060 break;

#061 }

#062

第 3 行、第 4 行删除空格和制表符。

第 6 、 7 行是把行缓冲区填满。

第 9 行到第 13 行是处理参数 pragma 。

第 14 到第 18 行是处理参数 ident 。

第 39 行到第 47 行是处理 line 参数，然后跳到第 21 行的标号 line 里处理行号识别。比如下面的代码：

#line 1 "hello.c"

就是识出 # 后，运行上面的函数，然后就识别出来 line 字符串，接着就到标识处理，把后面的字符串 1 识别出来，把它转换为 10 进制值赋值给行号变量 lineno 。

第 27 行到第 37 行是识别后面的文件字符串 hello.c ，赋值给 file 。

这样就可以把上面的行号源程序处理完成，得到当前文件名称和当前行号，定位到源程序出错的位置了。

处理完上面的行号源程序后，就会调用类型初始化，如下：

type_init(argc, argv);

类型初始化，其实就是设置 C 编译器内部预先定义的基本类型。下面就来看看具体是怎么样的。

#001 void type_init(int argc, char *argv[])

#002 {

#003 static int inited;

#004 int i;

#005

#006 if (inited)

#007 return;

#008

#009 inited = 1;

#010 if (!IR)

#011 return;

#012

#013 for (i = 1; i < argc; i++)

#014 {

#015 int size, align, outofline;

#016 if (strncmp(argv[i], "-unsigned_char=", 15) == 0)

#017 IR->unsigned_char = argv[i][15] - '0';

#018

#019 #define xx(name) /

#020 else if (sscanf(argv[i], "-" #name "=%d,%d,%d", &size, &align, &outofline) == 3) { /

#021 IR->name.size = size; IR->name.align = align; /

#022 IR->name.outofline = outofline; }

#023 xx(charmetric)

#024 xx(shortmetric)

#025 xx(intmetric)

#026 xx(longmetric)

#027 xx(longlongmetric)

#028 xx(floatmetric)

#029 xx(doublemetric)

#030 xx(longdoublemetric)

#031 xx(ptrmetric)

#032 xx(structmetric)

#033 #undef xx

#034 }

#035

#036 #define xx(v,name,op,metrics) v=xxinit(op,name,IR->metrics)

#037 xx(chartype, "char", IR->unsigned_char ? UNSIGNED : INT,charmetric);

#038 xx(doubletype, "double", FLOAT, doublemetric);

#039 xx(floattype, "float", FLOAT, floatmetric);

#040 xx(inttype, "int", INT, intmetric);

#041 xx(longdouble, "long double", FLOAT, longdoublemetric);

#042 xx(longtype, "long int", INT, longmetric);

#043 xx(longlong, "long long int", INT, longlongmetric);

#044 xx(shorttype, "short", INT, shortmetric);

#045 xx(signedchar, "signed char", INT, charmetric);

#046 xx(unsignedchar, "unsigned char", UNSIGNED,charmetric);

#047 xx(unsignedlong, "unsigned long", UNSIGNED,longmetric);

#048 xx(unsignedshort, "unsigned short", UNSIGNED,shortmetric);

#049 xx(unsignedtype, "unsigned int", UNSIGNED,intmetric);

#050 xx(unsignedlonglong,"unsigned long long",UNSIGNED,longlongmetric);

#051 #undef xx

#052

#053 {

#054 Symbol p;

#055 p = install(string("void"), &types, GLOBAL, PERM);

#056 voidtype = type(VOID, NULL, 0, 0, p);

#057 p->type = voidtype;

#058 }

#059

#060 pointersym = install(string("T*"), &types, GLOBAL, PERM);

#061 pointersym->addressed = IR->ptrmetric.outofline;

#062 pointersym->u.limits.max.p = (void*)ones(8*IR->ptrmetric.size);

#063 pointersym->u.limits.min.p = 0;

#064 voidptype = ptr(voidtype);

#065 funcptype = ptr(func(voidtype, NULL, 1));

#066 charptype = ptr(chartype);

#067 #define xx(v,t) if (v==NULL && t->size==voidptype->size && t->align==voidptype->align) v=t

#068 xx(unsignedptr,unsignedshort);

#069 xx(unsignedptr,unsignedtype);

#070 xx(unsignedptr,unsignedlong);

#071 xx(unsignedptr,unsignedlonglong);

#072 if (unsignedptr == NULL)

#073 unsignedptr = type(UNSIGNED, NULL, voidptype->size, voidptype->align, voidptype->u.sym);

#074 xx(signedptr,shorttype);

#075 xx(signedptr,inttype);

#076 xx(signedptr,longtype);

#077 xx(signedptr,longlong);

#078 if (signedptr == NULL)

#079 signedptr = type(INT, NULL, voidptype->size, voidptype->align, voidptype->u.sym);

#080 #undef xx

#081 widechar = unsignedshort;

#082

#083 for (i = 0; i < argc; i++)

#084 {

#085 #define xx(name,type) /

#086 if (strcmp(argv[i], "-wchar_t=" #name) == 0) /

#087 widechar = type;

#088 xx(unsigned_char,unsignedchar)

#089 xx(unsigned_int,unsignedtype)

#090 xx(unsigned_short,unsignedshort)

#091 }

#092 #undef xx

#093 }

上面的代码主要把所有缺省的类型创建到一个表数据 types 里，把 char 、 double 、 float 、 int 类型初始化到那个表里。每个类型定义如下：

#001 struct type

#002 {

#003 int op;

#004 Type type;

#005 int align;

#006 int size;

#007 union

#008 {

#009 Symbol sym;

#010 struct

#011 {

#012 unsigned oldstyle:1;

#013 Type *proto;

#014 } f;

#015 } u;

#016 Xtype x;

#017 };

主要有类型对齐方式，类型占用大小，还有扩展类型。在 C 里，类型是很重要的，因为所有变量都是基类数据类型声明的，不同类型的属性不一致，这些都需要进行比较的。定义了这样的类型表，就比较好查找变量的类型，同时也可以节省编译时的存储空间。

到这里，就把类型初始化理解完成， C 语言是强类型的语言，所有变量和函数都需要先声明后使用，并且不同的类型不等价的，相互之间需要进行转换。

现在已经把 C 编译器的初始化工作准备完成了，后面就开始读取源程序里的记号进行处理，也就是进入词法分析阶段，越来越来精彩了。

转载于:https://www.cnblogs.com/ajuanabc/archive/2007/05/15/2464365.html

anjichan4261

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
LCC编译器的源程序分析(5)行号同步与类型初始化

上面已经介绍打开文件输入，并且分析了读取到缓冲区里的代码，接着下来就是分析行号同步的处理，还有类型初始化。先来看看生成中间文件hello.i中的源程序，在它的第1行和第2行如下：#001#line 1 "hello.c"#002#line 1 "include/stdio.h"#003#004#005#006#007typedef un...
复制链接

扫一扫