4.1.3.1.2.1.1.4. Create macro definition – ISO mode
As we have seen, standard mode is token based, so for each token found, it needs a data to save its information as below.
175 struct cpp_token in cpplib.h
176 {
177 fileline line; /* Logical line of first char of token. */
178 unsigned short col; /* Column of first char of token. */
179 ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
180 unsigned char flags; /* flags - see above */
181
182 union
183 {
184 cpp_hashnode *node; /* An identifier. */
185 const cpp_token *source; /* Inherit padding from this token. */
186 struct cpp_string str; /* A string, or number. */
187 unsigned int arg_no; /* Argument no. for a CPP_MACRO_ARG. */
188 } val;
189 };
_cpp_aligned_alloc allocates permanent, aligned storage of length len from a_buff in cpp_reader. That buffer is used for growing allocations when saving macro replacement lists in a #define, and when parsing an answer to an assertion in #assert, #unassert or #if (and therefore possibly whilst expanding macros).
In cpp_reader, it has base_run – used to cache normal tokens (includes macro name); a_buff - used to save macro expansion body etc. – tokens can’t be treated as normal ones; and u_buff which is used for string producing in preprocessing. Thus, below in _cpp_create_definition, at line1486, it invokes _cpp_aligned_alloc for parsing of the expansion body.
1522 unsigned char *
1523 _cpp_aligned_alloc (cpp_reader *pfile, size_t len) in cpplex.c
1524 {
1525 _cpp_buff *buff = pfile->a_buff;
1526 unsigned char *result = buff->cur;
1527
1528 if (len > (size_t) (buff->limit - result))
1529 {
1530 buff = _cpp_get_buff (pfile, len);
1531 buff->next = pfile->a_buff;
1532 pfile->a_buff = buff;
1533 result = buff->cur;
1534 }
1535
1536 buff->cur = result + len;
1537 return result;
1538 }
Below _cpp_create_definition will read in and handle all tokens in the macro definition in one invocation. That means when we return from it, we may advance quite far ahead in the source file.
1479 bool
1480 _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node) in cppmacro.c
1481 {
1482 cpp_macro *macro;
1483 unsigned int i;
1484 bool ok;
1485
1486 macro = (cpp_macro *) _cpp_aligned_alloc (pfile, sizeof (cpp_macro));
1487 macro->line = pfile->directive_line;
1488 macro->params = 0;
1489 macro->paramc = 0;
1490 macro->variadic = 0;
1491 macro->used = ! CPP_OPTION (pfile, warn_unused_macros);
1492 macro->count = 0;
1493 macro->fun_like = 0;
1494 /* To suppress some diagnostics. */
1495 macro->syshdr = pfile->map->sysp != 0;
1496
1497 if (CPP_OPTION (pfile, traditional))
1498 ok = _cpp_create_trad_definition (pfile, macro);
1499 else
1500 {
1501 cpp_token *saved_cur_token = pfile->cur_token;
1502
1503 ok = create_iso_definition (pfile, macro);
1504
1505 /* Restore lexer position because of games lex_expansion_token()
1506 plays lexing the macro. We set the type for SEEN_EOL() in
1507 cpplib.c.
1508
1509 Longer term we should lex the whole line before coming here,
1510 and just copy the expansion. */
1511 saved_cur_token[-1].type = pfile->cur_token[-1].type;
1512 pfile->cur_token = saved_cur_token;
1513
1514 /* Stop the lexer accepting __VA_ARGS__. */
1515 pfile->state.va_args_ok = 0;
1516 }
4.1.3.1.2.1.4.4.1. Fetch token
GCC implements a preprocessor capable of expanding macro as read in source, during which all tokens will be cached into buffer held by a_buff, except tokens in marco definition which should be kept in another place, so when expansion required, they can be used for the replacement quickly. Thus, at line 1501 above, it first caches the pointer of current token, and restores it at line 1512 after parsing the macro, and then tokens of the macro definition kept in a_buff will be overwritten by new coming tokens, so as to avoid recursion in the macro definition.
1383 static bool
1384 create_iso_definition (cpp_reader *pfile, cpp_macro *macro) in cppmacro.c
1385 {
1386 cpp_token *token;
1387 const cpp_token *ctoken;
1388
1389 /* Get the first token of the expansion (or the '(' of a
1390 function-like macro). */
1391 ctoken = _cpp_lex_token (pfile);
Below at line 704 lookaheads is nonzero if we do lookahead and have fetched tokens beforehand, otherwise we need fetch token in buffer of pfile by _cpp_lex_direct (remember that now the macro definition in PCH file is in buffer of pfile).
691 const cpp_token *
692 _cpp_lex_token (cpp_reader *pfile) in cpplex.c
693 {
694 cpp_token *result;
695
696 for (;;)
697 {
698 if (pfile->cur_token == pfile->cur_run->limit)
699 {
700 pfile->cur_run = next_tokenrun (pfile->cur_run);
701 pfile->cur_token = pfile->cur_run->base;
702 }
703
704 if (pfile->lookaheads)
705 {
706 pfile->lookaheads--;
707 result = pfile->cur_token++;
708 }
709 else
710 result = _cpp_lex_direct (pfile);
711
712 if (result->flags & BOL)
713 {
714 /* Is this a directive. If _cpp_handle_directive returns
715 false, it is an assembler #. */
716 if (result->type == CPP_HASH
717 /* 6.10.3 p 11: Directives in a list of macro arguments
718 gives undefined behavior. This implementation
719 handles the directive as normal. */
720 && pfile->state.parsing_args != 1
721 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
722 continue;
723 if (pfile->cb.line_change && !pfile->state.skipping)
724 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
725 }
726
727 /* We don't skip tokens in directives. */
728 if (pfile->state.in_directive)
729 break;
730
731 /* Outside a directive, invalidate controlling macros. At file
732 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
733 get here and MI optimization works. */
734 pfile->mi_valid = false;
735
736 if (!pfile->state.skipping || result->type == CPP_EOF)
737 break;
738 }
739
740 return result;
741 }
At line 712 above, BOL means the token is at the begnning of the line, and CPP_HASH at line 716 means the token is ‘#’ (so teogther with following tokens, a directive may be found). While at line 720, parsing_args of state if nonzero means parsing arguments to a function-like macro, as directive in list of marco arguments has undefined behavior – the implementation here just treat it as normal parameter. Otherwise, it needs try to handle the stream of tokens coming in as directive.
However, in PCH file, no hash (#) token would be found (all directives have been handled and expanded during generate the PCH file). The macro in PCH file may looks like following example:
D4 00 00 00 08 00 00 00 “strndupa(s,n) (__extension__ ({ __const char *__old = (s); size_t __len = strnlen (__old, (n)); char *__new = (char *) __builtin_alloca (__len + 1); __new[__len] = '/0'; (char *) memcpy (__new, __old, __len); }))” 0C
In above string, data enclosed by “” is in form of ASCII, other is in binary form. In it, D4 00 00 00 08 00 00 00 forms macrodef_struct, as the data is fetched upon Linux platform, D4 00 00 00 is the value of d4 which is definition_length, following 08 00 is the value of 8 which is name_length, and 00 00 is the flags of 0.
Then for line change (fetch first token of the line), it also needs to update the line number. But here it is meaningless as we are in PCH file.
4.1.3.1.2.1.4.4.2. Parse parameters
For non-function marco, no argument presetns, it is handled by branch at line 1404.
create_iso_definition (continue)
1393 if (ctoken->type == CPP_OPEN_PAREN && !(ctoken->flags & PREV_WHITE))
1394 {
1395 bool ok = parse_params (pfile, macro);
1396 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1397 if (!ok)
1398 return false;
1399
1400 /* Success. Commit the parameter array. */
1401 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
1402 macro->fun_like = 1;
1403 }
1404 else if (ctoken->type != CPP_EOF && !(ctoken->flags & PREV_WHITE))
1405 cpp_error (pfile, CPP_DL_PEDWARN,
1406 "ISO C requires whitespace after the macro name");
For function-like macro, left parentheses must not be separated by white space (white space, tab, and in ISO mode comment) from macro’s name. For the correct form, parse_params will parse the parameter list.
1271 static bool
1272 parse_params (cpp_reader *pfile, cpp_macro *macro) in cppmacro.c
1273 {
1274 unsigned int prev_ident = 0;
1275
1276 for (;;)
1277 {
1278 const cpp_token *token = _cpp_lex_token (pfile);
1279
1280 switch (token->type)
1281 {
1282 default:
1283 /* Allow/ignore comments in parameter lists if we are
1284 preserving comments in macro expansions. */
1285 if (token->type == CPP_COMMENT
1286 && ! CPP_OPTION (pfile, discard_comments_in_macro_exp))
1287 continue;
1288
1289 cpp_error (pfile, CPP_DL_ERROR,
1290 "/"%s/" may not appear in macro parameter list",
1291 cpp_token_as_text (pfile, token));
1292 return false;
1293
1294 case CPP_NAME:
1295 if (prev_ident)
1296 {
1297 cpp_error (pfile, CPP_DL_ERROR,
1298 "macro parameters must be comma-separated");
1299 return false;
1300 }
1301 prev_ident = 1;
1302
1303 if (_cpp_save_parameter (pfile, macro, token->val.node))
1304 return false;
1305 continue;
1306
1307 case CPP_CLOSE_PAREN:
1308 if (prev_ident || macro->paramc == 0)
1309 return true;
1310
1311 /* Fall through to pick up the error. */
1312 case CPP_COMMA:
1313 if (!prev_ident)
1314 {
1315 cpp_error (pfile, CPP_DL_ERROR, "parameter name missing");
1316 return false;
1317 }
1318 prev_ident = 0;
1319 continue;
1320
1321 case CPP_ELLIPSIS:
1322 macro->variadic = 1;
1323 if (!prev_ident)
1324 {
1325 _cpp_save_parameter (pfile, macro,
1326 pfile->spec_nodes.n__VA_ARGS__);
1327 pfile->state.va_args_ok = 1;
1328 if (!CPP_OPTION (pfile, c99) && CPP_OPTION (pfile, pedantic))
1329 cpp_error (pfile, CPP_DL_PEDWARN,
1330 "anonymous variadic macros were introduced in C99");
1331 }
1332 else if (CPP_OPTION (pfile, pedantic))
1333 cpp_error (pfile, CPP_DL_PEDWARN,
1334 "ISO C does not permit named variadic macros");
1335
1336 /* We're at the end, and just expect a closing parenthesis. */
1337 token = _cpp_lex_token (pfile);
1338 if (token->type == CPP_CLOSE_PAREN)
1339 return true;
1340 /* Fall through. */
1341
1342 case CPP_EOF:
1343 cpp_error (pfile, CPP_DL_ERROR, "missing ')' in macro parameter list");
1344 return false;
1345 }
1346 }
1347 }
In the function variable prev_ident indicates last found token is identifer, and variadic at line 1322 means the macro looks like variadic function – containing … in the parameter list. The variadic macro is descibed in detail below in [4].
A macor can be declared to accept a variable number of arguments much as a function can. The syntax for defining the macro is similar to that of a function. Here is an example: #define eprintf(…) fprintf (stderr, __VA_ARGS__) This kind of macro is called variadic. When the macro is invoked, all the tokens in its argument list after the last named argument (this macro has none), including any commas, become the variable argument. This sequence of tokens replaces the identifier __VA_ARGS__ in the macro body wherever it appears. Thus, we have this expansion: eprintf (“%s:%d: “, input_file, lineno) à fprintf (stderr, “%s:%d: “, input_file, lineno) The variable argument is completely macro-expanded before it is inserted into the macro expansion, just like an ordinary argument. You may use the ‘#’ and ‘##’ operators to stringify the variable argument or to paste its leading or trailing token with another token. (But see below for an important special case for ‘##’.) If your macro is complicated, you may want a more descriptive name for the variable argument than __VA_ARGS__. GNU CPP permits this, as an extension. You may write an argment name immediately before the ‘…’; that name is used for the variable argument. The eprintf macro above could be written #define eprintf(args…) fprintf (stderr, args) using this extension. You cannot use __VA_ARGS__ and this extension in the same macro. You can have named arguments as well as variable arguments in a variadic macro. We could define eprintf like this, instead: #define eprintf(format, …) fprintf(stderr, format, __VA_ARGS__) This formulation looks more descriptive, but unfortunately it is less flexible: you must now supply at least one argument after the format string. In standard C, you cannot omit the comma separating the named argument from the variable arguments. Furthermore, if you leave the variable argument empty, you will get a syntax error, because there will be an extra comma after the format string. eprintf(“suecess!/n”, ); à fprintf(stderr, “success!/n”, ); GUN CPP has a pair of extensions which deal with this problem. First, you are allowed to leave the variable argument out entirely: eprintf(“sueccess!/n”) à fprintf(stderr, “success!/n”, ); Second, the ‘##’ token paste operator has a special meaning when placed between a comma and a variable argument. If you write #define eprintf(format, …) fprintf(stderr, format, ##_VA_ARGS__) And the variable argument is left out when the eprintf macro is used, then the comma before the ‘##’ will be deleted. This does not happen if you pass an empty argument, nor does it happen if the token preceding ‘##’ is anything other than a comma. C99 mandates that the only place the identifier __VA_ARGS__ can appear is in the replacement list of a variadic macro. It may not be used as a macro name, macro argument name, or within a different type of macro. It may also be forbidden in open text; the standard is ambiguous. We recommend you avoid using it except for its defined purpose. Variadic macros are a new feature in C99. GNU CPP has supported them for a long time, but only with a named variable argument (‘arg…’, not ‘…’ and __VA_ARGS__). If you are concerned with portability to previous versions of GCC, you should use only named variable arguments. On the other hand, if you are concerned with portability to other conforming implementations of C99, you should use only __VA_ARGS__. Previous versions of GNU CPP implemented the comma-deletion extension much more generally. We have restricted it in this release to minimize the differences from C99. To get the same effect with both this and previous versions of GCC, the token proceding the special ‘##’ must be a comma, and there must be white space between that comma and whatever comes immediately before it: #define eprintf(format, args…) fprintf (stderr, format, ##args) |
For found parameter, in _cpp_lex_token above at line 1278, in parse_params, cpp_hashnode has been created (if the node exists, it would be returned instead, which may represent other entity of the same name in the source) for the identifier and saved in node field of val of cpp_token.
1238 bool in cppmacro.c
1239 _cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node)
1240 {
1241 unsigned int len;
1242 /* Constraint 6.10.3.6 - duplicate parameter names. */
1243 if (node->flags & NODE_MACRO_ARG)
1244 {
1245 cpp_error (pfile, CPP_DL_ERROR, "duplicate macro parameter /"%s/"",
1246 NODE_NAME (node));
1247 return true;
1248 }
1249
1250 if (BUFF_ROOM (pfile->a_buff)
1251 < (macro->paramc + 1) * sizeof (cpp_hashnode *))
1252 _cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_hashnode *));
1253
1254 ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = node;
1255 node->flags |= NODE_MACRO_ARG;
1256 len = macro->paramc * sizeof (union _cpp_hashnode_value);
1257 if (len > pfile->macro_buffer_len)
1258 {
1259 pfile->macro_buffer = xrealloc (pfile->macro_buffer, len);
1260 pfile->macro_buffer_len = len;
1261 }
1262 ((union _cpp_hashnode_value *) pfile->macro_buffer)[macro->paramc - 1]
1263 = node->value;
1264
1265 node->value.arg_index = macro->paramc;
1266 return false;
1267 }
In _cpp_save_parameter, this node of cpp_hashnode and its value are saved into aligned buffer of cpp_reader (a_buff ) and macro_buffer of cpp_macro respectively (considering this node may stand for other syntax ingredence). Then slot macro->paramc records the number of arguments. And the cpp_hashnode node is updated to hold the sequence number of the parameter at line 1265.
After parsing the parameter list, it follows the macro expansion body. It is done by lex_expansion_token which first invokes alloc_expansion_token to get a free buffer for the coming token in aligned buffer of cpp_reader. Note that below at line 1366, pfile->cur_token used buffer of base_run before, but now a_buff.
1350 static cpp_token *
1351 alloc_expansion_token (cpp_reader *pfile, cpp_macro *macro) in cppmacro.c
1352 {
1353 if (BUFF_ROOM (pfile->a_buff) < (macro->count + 1) * sizeof (cpp_token))
1354 _cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_token));
1355
1356 return &((cpp_token *) BUFF_FRONT (pfile->a_buff))[macro->count++];
1357 }
BUFF_FRONT above is defined as #define BUFF_FRONT(BUFF) ((BUFF)->cur). And macro->count records the number of tokens in macro expansion body.
1361 static cpp_token *
1362 lex_expansion_token (cpp_reader *pfile, cpp_macro *macro) in cppmacro.c
1363 {
1364 cpp_token *token;
1365
1366 pfile->cur_token = alloc_expansion_token (pfile, macro);
1367 token = _cpp_lex_direct (pfile);
1368
1369 /* Is this a parameter? */
1370 if (token->type == CPP_NAME
1371 && (token->val.node->flags & NODE_MACRO_ARG) != 0)
1372 {
1373 token->type = CPP_MACRO_ARG;
1374 token->val.arg_no = token->val.node->value.arg_index;
1375 }
1376 else if (CPP_WTRADITIONAL (pfile) && macro->paramc > 0
1377 && (token->type == CPP_STRING || token->type == CPP_CHAR))
1378 check_trad_stringification (pfile, macro, &token->val.str);
1379
1380 return token;
1381 }
As every identifier presented by cpp_hashnode should be unique promising by ident_hash in the macro definition; and for identifier of parameter, it has NODE_MACRO_ARG set in its flag in _cpp_save_parameter at line 1255. So for those found in the expansion body has same name as parameters, needs marks it as CPP_MACRO_ARG. And see that for token of this kind, its value will be sequence number of the parameter set at line 1374. So it can quickly access the entity in following processing.