Studying note of GCC-3.4.6 source (35)

4.1.3.1.2.1.1.4.        Create macro definition – ISO mode

As we have seen, standard mode is token based, so for each token found, it needs a data to save its information as below.

 

175  struct cpp_token                                                                                        in cpplib.h

176  {

177    fileline line;              /* Logical line of first char of token.  */

178    unsigned short col;           /* Column of first char of token.  */

179    ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */

180    unsigned char flags;          /* flags - see above */

181 

182    union

183    {

184      cpp_hashnode *node;           /* An identifier.  */

185      const cpp_token *source;      /* Inherit padding from this token.  */

186      struct cpp_string str;      /* A string, or number.  */

187      unsigned int arg_no;      /* Argument no. for a CPP_MACRO_ARG.  */

188    } val;

189  };

 

_cpp_aligned_alloc allocates permanent, aligned storage of length len from a_buff in cpp_reader. That buffer is used for growing allocations when saving macro replacement lists in a #define, and when parsing an answer to an assertion in #assert, #unassert or #if (and therefore possibly whilst expanding macros).

In cpp_reader, it has base_run – used to cache normal tokens (includes macro name); a_buff - used to save macro expansion body etc. – tokens can’t be treated as normal ones; and u_buff which is used for string producing in preprocessing. Thus, below in _cpp_create_definition, at line1486, it invokes _cpp_aligned_alloc for parsing of the expansion body.

 

1522 unsigned char *

1523 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)                                        in cpplex.c

1524 {

1525   _cpp_buff *buff = pfile->a_buff;

1526   unsigned char *result = buff->cur;

1527

1528   if (len > (size_t) (buff->limit - result))

1529   {

1530     buff = _cpp_get_buff (pfile, len);

1531     buff->next = pfile->a_buff;

1532     pfile->a_buff = buff;

1533     result = buff->cur;

1534   }

1535

1536   buff->cur = result + len;

1537   return result;

1538 }

 

Below _cpp_create_definition will read in and handle all tokens in the macro definition in one invocation. That means when we return from it, we may advance quite far ahead in the source file.

 

1479 bool

1480 _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node)            in cppmacro.c

1481 {

1482   cpp_macro *macro;

1483   unsigned int i;

1484   bool ok;

1485

1486   macro = (cpp_macro *) _cpp_aligned_alloc (pfile, sizeof (cpp_macro));

1487   macro->line = pfile->directive_line;

1488   macro->params = 0;

1489   macro->paramc = 0;

1490   macro->variadic = 0;

1491   macro->used = ! CPP_OPTION (pfile, warn_unused_macros);

1492   macro->count = 0;

1493   macro->fun_like = 0;

1494   /* To suppress some diagnostics.  */

1495   macro->syshdr = pfile->map->sysp != 0;

1496

1497   if (CPP_OPTION (pfile, traditional))

1498     ok = _cpp_create_trad_definition (pfile, macro);

1499   else

1500   {

1501     cpp_token *saved_cur_token = pfile->cur_token;

1502

1503     ok = create_iso_definition (pfile, macro);

1504

1505     /* Restore lexer position because of games lex_expansion_token()

1506       plays lexing the macro. We set the type for SEEN_EOL() in

1507       cpplib.c.

1508

1509       Longer term we should lex the whole line before coming here,

1510       and just copy the expansion.  */

1511     saved_cur_token[-1].type = pfile->cur_token[-1].type;

1512     pfile->cur_token = saved_cur_token;

1513

1514     /* Stop the lexer accepting __VA_ARGS__.  */

1515     pfile->state.va_args_ok = 0;

1516   }

4.1.3.1.2.1.4.4.1.  Fetch token

GCC implements a preprocessor capable of expanding macro as read in source, during which all tokens will be cached into buffer held by a_buff, except tokens in marco definition which should be kept in another place, so when expansion required, they can be used for the replacement quickly. Thus, at line 1501 above, it first caches the pointer of current token, and restores it at line 1512 after parsing the macro, and then tokens of the macro definition kept in a_buff will be overwritten by new coming tokens, so as to avoid recursion in the macro definition.

 

1383 static bool

1384 create_iso_definition (cpp_reader *pfile, cpp_macro *macro)                 in cppmacro.c

1385 {

1386   cpp_token *token;

1387   const cpp_token *ctoken;

1388

1389   /* Get the first token of the expansion (or the '(' of a

1390     function-like macro).  */

1391   ctoken = _cpp_lex_token (pfile);

 

Below at line 704 lookaheads is nonzero if we do lookahead and have fetched tokens beforehand, otherwise we need fetch token in buffer of pfile by _cpp_lex_direct (remember that now the macro definition in PCH file is in buffer of pfile).

 

691    const cpp_token *

692    _cpp_lex_token (cpp_reader *pfile)                                                          in cpplex.c

693    {

694      cpp_token *result;

695   

696      for (;;)

697      {

698        if (pfile->cur_token == pfile->cur_run->limit)

699        {

700          pfile->cur_run = next_tokenrun (pfile->cur_run);

701          pfile->cur_token = pfile->cur_run->base;

702        }

703   

704        if (pfile->lookaheads)

705        {

706          pfile->lookaheads--;

707          result = pfile->cur_token++;

708        }

709        else

710          result = _cpp_lex_direct (pfile);

711    

712        if (result->flags & BOL)

713        {

714          /* Is this a directive. If _cpp_handle_directive returns

715            false, it is an assembler #.  */

716          if (result->type == CPP_HASH

717            /* 6.10.3 p 11: Directives in a list of macro arguments

718              gives undefined behavior. This implementation

719              handles the directive as normal.  */

720               && pfile->state.parsing_args != 1

721               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))

722            continue;

723          if (pfile->cb.line_change && !pfile->state.skipping)

724            pfile->cb.line_change (pfile, result, pfile->state.parsing_args);

725        }

726   

727        /* We don't skip tokens in directives.  */

728        if (pfile->state.in_directive)

729          break;

730   

731        /* Outside a directive, invalidate controlling macros. At file

732          EOF, _cpp_lex_direct takes care of popping the buffer, so we never

733          get here and MI optimization works.  */

734        pfile->mi_valid = false;

735   

736        if (!pfile->state.skipping || result->type == CPP_EOF)

737          break;

738      }

739   

740      return result;

741    }

 

At line 712 above, BOL means the token is at the begnning of the line, and CPP_HASH at line 716 means the token is ‘#’ (so teogther with following tokens, a directive may be found). While at line 720, parsing_args of state if nonzero means parsing arguments to a function-like macro, as directive in list of marco arguments has undefined behavior – the implementation here just treat it as normal parameter. Otherwise, it needs try to handle the stream of tokens coming in as directive.

However, in PCH file, no hash (#) token would be found (all directives have been handled and expanded during generate the PCH file). The macro in PCH file may looks like following example:

D4 00 00 00 08 00 00 00 “strndupa(s,n) (__extension__ ({ __const char *__old = (s); size_t __len = strnlen (__old, (n)); char *__new = (char *) __builtin_alloca (__len + 1); __new[__len] = '/0'; (char *) memcpy (__new, __old, __len); }))” 0C

In above string, data enclosed by “” is in form of ASCII, other is in binary form. In it, D4 00 00 00 08 00 00 00 forms macrodef_struct, as the data is fetched upon Linux platform, D4 00 00 00 is the value of d4 which is definition_length, following 08 00 is the value of 8 which is name_length, and 00 00 is the flags of 0.

Then for line change (fetch first token of the line), it also needs to update the line number. But here it is meaningless as we are in PCH file.

4.1.3.1.2.1.4.4.2.  Parse parameters

For non-function marco, no argument presetns, it is handled by branch at line 1404.

 

create_iso_definition (continue)

 

1393   if (ctoken->type == CPP_OPEN_PAREN && !(ctoken->flags & PREV_WHITE))

1394   {

1395     bool ok = parse_params (pfile, macro);

1396     macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);

1397     if (!ok)

1398       return false;

1399

1400     /* Success. Commit the parameter array.  */

1401     BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];

1402     macro->fun_like = 1;

1403   }

1404   else if (ctoken->type != CPP_EOF && !(ctoken->flags & PREV_WHITE))

1405     cpp_error (pfile, CPP_DL_PEDWARN,

1406          "ISO C requires whitespace after the macro name");

 

For function-like macro, left parentheses must not be separated by white space (white space, tab, and in ISO mode comment) from macro’s name. For the correct form, parse_params will parse the parameter list.

 

1271 static bool

1272 parse_params (cpp_reader *pfile, cpp_macro *macro)                           in cppmacro.c

1273 {

1274   unsigned int prev_ident = 0;

1275

1276   for (;;)

1277   {

1278     const cpp_token *token = _cpp_lex_token (pfile);

1279

1280     switch (token->type)

1281     {

1282       default:

1283         /* Allow/ignore comments in parameter lists if we are

1284           preserving comments in macro expansions.  */

1285         if (token->type == CPP_COMMENT

1286            && ! CPP_OPTION (pfile, discard_comments_in_macro_exp))

1287           continue;

1288

1289         cpp_error (pfile, CPP_DL_ERROR,

1290                  "/"%s/" may not appear in macro parameter list",

1291                  cpp_token_as_text (pfile, token));

1292         return false;

1293

1294       case CPP_NAME:

1295         if (prev_ident)

1296         {

1297           cpp_error (pfile, CPP_DL_ERROR,

1298                   "macro parameters must be comma-separated");

1299           return false;

1300         }

1301         prev_ident = 1;

1302

1303         if (_cpp_save_parameter (pfile, macro, token->val.node))

1304           return false;

1305         continue;

1306

1307       case CPP_CLOSE_PAREN:

1308         if (prev_ident || macro->paramc == 0)

1309           return true;

1310

1311       /* Fall through to pick up the error.  */

1312       case CPP_COMMA:

1313         if (!prev_ident)

1314         {

1315           cpp_error (pfile, CPP_DL_ERROR, "parameter name missing");

1316           return false;

1317         }

1318         prev_ident = 0;

1319         continue;

1320

1321       case CPP_ELLIPSIS:

1322         macro->variadic = 1;

1323         if (!prev_ident)

1324         {

1325           _cpp_save_parameter (pfile, macro,

1326                             pfile->spec_nodes.n__VA_ARGS__);

1327           pfile->state.va_args_ok = 1;

1328           if (!CPP_OPTION (pfile, c99) && CPP_OPTION (pfile, pedantic))

1329             cpp_error (pfile, CPP_DL_PEDWARN,

1330                      "anonymous variadic macros were introduced in C99");

1331         }

1332         else if (CPP_OPTION (pfile, pedantic))

1333           cpp_error (pfile, CPP_DL_PEDWARN,

1334                    "ISO C does not permit named variadic macros");

1335

1336         /* We're at the end, and just expect a closing parenthesis.  */

1337         token = _cpp_lex_token (pfile);

1338         if (token->type == CPP_CLOSE_PAREN)

1339           return true;

1340         /* Fall through.  */

1341

1342       case CPP_EOF:

1343         cpp_error (pfile, CPP_DL_ERROR, "missing ')' in macro parameter list");

1344         return false;

1345     }

1346   }

1347 }

 

In the function variable prev_ident indicates last found token is identifer, and variadic at line 1322 means the macro looks like variadic function – containing … in the parameter list. The variadic macro is descibed in detail below in [4].

A macor can be declared to accept a variable number of arguments much as a function can. The syntax for defining the macro is similar to that of a function. Here is an example:

#define eprintf(…) fprintf (stderr, __VA_ARGS__)

This kind of macro is called variadic. When the macro is invoked, all the tokens in its argument list after the last named argument (this macro has none), including any commas, become the variable argument. This sequence of tokens replaces the identifier __VA_ARGS__ in the macro body wherever it appears. Thus, we have this expansion:

eprintf (“%s:%d: “, input_file, lineno) à fprintf (stderr, “%s:%d: “, input_file, lineno)

The variable argument is completely macro-expanded before it is inserted into the macro expansion, just like an ordinary argument. You may use the ‘#’ and ‘##’ operators to stringify the variable argument or to paste its leading or trailing token with another token. (But see below for an important special case for ‘##’.)

If your macro is complicated, you may want a more descriptive name for the variable argument than __VA_ARGS__. GNU CPP permits this, as an extension. You may write an argment name immediately before the ‘…’; that name is used for the variable argument. The eprintf macro above could be written

#define eprintf(args…) fprintf (stderr, args)

using this extension. You cannot use __VA_ARGS__ and this extension in the same macro.

You can have named arguments as well as variable arguments in a variadic macro. We could define eprintf like this, instead:

#define eprintf(format, …) fprintf(stderr, format, __VA_ARGS__)

This formulation looks more descriptive, but unfortunately it is less flexible: you must now supply at least one argument after the format string. In standard C, you cannot omit the comma separating the named argument from the variable arguments. Furthermore, if you leave the variable argument empty, you will get a syntax error, because there will be an extra comma after the format string.

eprintf(“suecess!/n”, ); à fprintf(stderr, “success!/n”, );

GUN CPP has a pair of extensions which deal with this problem. First, you are allowed to leave the variable argument out entirely:

eprintf(“sueccess!/n”) à fprintf(stderr, “success!/n”, );

Second, the ‘##’ token paste operator has a special meaning when placed between a comma and a variable argument. If you write

#define eprintf(format, …) fprintf(stderr, format, ##_VA_ARGS__)

And the variable argument is left out when the eprintf macro is used, then the comma before the ‘##’ will be deleted. This does not happen if you pass an empty argument, nor does it happen if the token preceding ‘##’ is anything other than a comma.

C99 mandates that the only place the identifier __VA_ARGS__ can appear is in the replacement list of a variadic macro. It may not be used as a macro name, macro argument name, or within a different type of macro. It may also be forbidden in open text; the standard is ambiguous. We recommend you avoid using it except for its defined purpose.

Variadic macros are a new feature in C99. GNU CPP has supported them for a long time, but only with a named variable argument (‘arg…’, not ‘…’ and __VA_ARGS__). If you are concerned with portability to previous versions of GCC, you should use only named variable arguments. On the other hand, if you are concerned with portability to other conforming implementations of C99, you should use only __VA_ARGS__.

Previous versions of GNU CPP implemented the comma-deletion extension much more generally. We have restricted it in this release to minimize the differences from C99. To get the same effect with both this and previous versions of GCC, the token proceding the special ‘##’ must be a comma, and there must be white space between that comma and whatever comes immediately before it:

#define eprintf(format, args…) fprintf (stderr, format, ##args)

For found parameter, in _cpp_lex_token above at line 1278, in parse_params, cpp_hashnode has been created (if the node exists, it would be returned instead, which may represent other entity of the same name in the source) for the identifier and saved in node field of val of cpp_token.

 

1238 bool                                                                                                  in cppmacro.c

1239 _cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node)

1240 {

1241   unsigned int len;

1242   /* Constraint 6.10.3.6 - duplicate parameter names.  */

1243   if (node->flags & NODE_MACRO_ARG)

1244   {

1245     cpp_error (pfile, CPP_DL_ERROR, "duplicate macro parameter /"%s/"",

1246              NODE_NAME (node));

1247     return true;

1248   }

1249

1250   if (BUFF_ROOM (pfile->a_buff)

1251       < (macro->paramc + 1) * sizeof (cpp_hashnode *))

1252     _cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_hashnode *));

1253

1254   ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = node;

1255   node->flags |= NODE_MACRO_ARG;

1256   len = macro->paramc * sizeof (union _cpp_hashnode_value);

1257   if (len > pfile->macro_buffer_len)

1258   {

1259     pfile->macro_buffer = xrealloc (pfile->macro_buffer, len);

1260     pfile->macro_buffer_len = len;

1261   }

1262   ((union _cpp_hashnode_value *) pfile->macro_buffer)[macro->paramc - 1]

1263     = node->value;

1264   

1265   node->value.arg_index  = macro->paramc;

1266   return false;

1267 }

 

In _cpp_save_parameter, this node of cpp_hashnode and its value are saved into aligned buffer of cpp_reader (a_buff ) and macro_buffer of cpp_macro respectively (considering this node may stand for other syntax ingredence). Then slot macro->paramc records the number of arguments. And the cpp_hashnode node is updated to hold the sequence number of the parameter at line 1265.

After parsing the parameter list, it follows the macro expansion body. It is done by lex_expansion_token which first invokes alloc_expansion_token to get a free buffer for the coming token in aligned buffer of cpp_reader. Note that below at line 1366, pfile->cur_token used buffer of base_run before, but now a_buff.

 

1350 static cpp_token *

1351 alloc_expansion_token (cpp_reader *pfile, cpp_macro *macro)              in cppmacro.c

1352 {

1353   if (BUFF_ROOM (pfile->a_buff) < (macro->count + 1) * sizeof (cpp_token))

1354     _cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_token));

1355

1356   return &((cpp_token *) BUFF_FRONT (pfile->a_buff))[macro->count++];

1357 }

 

BUFF_FRONT above is defined as #define BUFF_FRONT(BUFF) ((BUFF)->cur). And macro->count records the number of tokens in macro expansion body.

 

1361 static cpp_token *

1362 lex_expansion_token (cpp_reader *pfile, cpp_macro *macro)                 in cppmacro.c

1363 {

1364   cpp_token *token;

1365

1366   pfile->cur_token = alloc_expansion_token (pfile, macro);

1367   token = _cpp_lex_direct (pfile);

1368

1369   /* Is this a parameter?  */

1370   if (token->type == CPP_NAME

1371       && (token->val.node->flags & NODE_MACRO_ARG) != 0)

1372   {

1373     token->type = CPP_MACRO_ARG;

1374     token->val.arg_no = token->val.node->value.arg_index;

1375   }

1376   else if (CPP_WTRADITIONAL (pfile) && macro->paramc > 0

1377      && (token->type == CPP_STRING || token->type == CPP_CHAR))

1378     check_trad_stringification (pfile, macro, &token->val.str);

1379

1380   return token;

1381 }

 

As every identifier presented by cpp_hashnode should be unique promising by ident_hash in the macro definition; and for identifier of parameter, it has NODE_MACRO_ARG set in its flag in _cpp_save_parameter at line 1255. So for those found in the expansion body has same name as parameters, needs marks it as CPP_MACRO_ARG. And see that for token of this kind, its value will be sequence number of the parameter set at line 1374. So it can quickly access the entity in following processing.

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值