lexical scanner of string literal token in lcc

1. string literal definition in c language
    1> "hello world"                   // ok
    2> L"hello world"                 // ok (wide character for representing chinese, japanese)
    3> "i " "love " "you"               // ok, apply string concetation
    4> "i " "love "
         "you"                                 // ok, apply string concetation
    5> ""                                       // ok, empty string
    6> "hello
         world"                              // error
    7> "hello /
         tworld"                             // error of c definition, but compile succeessfully in lcc & gcc [1]

 

2. call relationship

    expression    // expr.c, see primary(void)

            |---> STRING_LITERAL_DFA   //  gettok()

                              |--->  scanner        //  scon()

                                            |--->  get escape sequence   // backslash()

                                            |--->  copy string literal into buffer   // cput(), or wcput()

                                            |--->  storage buffer for string literal  // char cbuf[BUFSIZE+1], or wchar wcbuf[BUFSIZE+1]

 

3. comment on source code

// comments on scanner of string literal, in lex.c

813 static void *scon(int q, void *put(int c, void *cl), void *cl) {
814     int n = 0, nbad = 0;              // n counts how many characters in string literal
815                                                    // q is the enclosing character, for example '/"'(double quotes)
816     do {
817         cp++;
818         while (*cp != q) {
819             int c;
820             if (map[*cp]&NEWLINE) {
821                 if (cp < limit)             // like 6> invalid occurence of literal string definition, goto error
822                     break;
823                 cp++;
824                 nextline();
825                 if (cp == limit)          // reach EOF, but without matching the enclosing character, goto error
826                     break;
827                 continue;
828             }
829             c = *cp++;
830             if (c == '//') {
831                 if (map[*cp]&NEWLINE) {
832                     if (cp < limit)       // like 7> invalide occurence of literal string definition, goto error
833                         break;
834                     cp++;
835                     nextline();
836                 }
837                 if (limit - cp < MAXTOKEN)
838                     fillbuf();
839                 c = backslash(q);      // deal with escape sequence
840             } else if (c < 0 || c > 255 || map[c] == 0)     // ? not sure of the use
841                 nbad++;
842             if (n++ < BUFSIZE)
843                 cl = put(c, cl);            // stores string literals in cbuf[BUFSIZE+1] or wcbuf[BUFSIZE+1]
844         }
845         if (*cp == q)
846             cp++;
847         else
848             error("missing %c/n", q);
849     } while (q == '"' && getchr() == '"');   // there is an '/"' after the literal string, so apply string concetation
850     cl = put(0, cl);
851     if (n >= BUFSIZE)
852         error("%s literal too long/n", q == '"' ? "string" : "character");
853     if (Aflag >= 2 && q == '"' && n > 509)
854         warning("more than 509 characters in a string literal/n");
855     if (Aflag >= 2 && nbad > 0)
856         warning("%s literal contains non-portable characters/n",
857             q == '"' ? "string" : "character");
858     return cl;
859 }

 

// comments on string literal DFA, in lex.c

156 int gettok(void) {

 ........

370         case '"': {
371             char *s = scon(*--cp, cput, cbuf);                       // s points to cbuf[] or wcbuf[]
372             tval.type = array(chartype, s - cbuf, 0);             // !! array(widechar, s - wcbuf, 0) for wide string literal
373             tval.u.c.v.p = cbuf;                                                // !! no string copy (1)
374             tsym = &tval;
375             return SCON;
376             }

 

// comments on literal string expression, in expr.c

339 static Tree primary(void) {
340     Tree p;
341
342     assert(t != '(');
343     switch (t) {
344     case ICON:

....

348     case SCON: if (ischar(tsym->type->type))     // !!! for ASCII character, tsym->type is a "char"
349             tsym->u.c.v.p = stringn(tsym->u.c.v.p, tsym->type->size);    // !!! string copy here (2)
350            else                                                               // !!! wide string character, tsym->type is a "widechar "
351             tsym->u.c.v.p = memcpy(allocate(tsym->type->size, PERM), tsym->u.c.v.p, tsym->type->    size);
352            tsym = constant(tsym->type, tsym->u.c.v);
353            if (tsym->u.c.loc == NULL)
354             tsym->u.c.loc = genident(STATIC, tsym->type, GLOBAL);
355            p = idtree(tsym->u.c.loc); break;


[1] invalid definition in c, but valid in c++, no error reported because afte c-preprocessor, it's become "hello /tworld"

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值