ANSI C grammar, Lex specification
In 1985, Jeff Lee published this Lex specificationtogether with aYaccgrammar for the April 30, 1985 ANSI C draft. Tom Stockfisch reposted both to net.sourcesin 1987; that original, as mentioned in the answer toquestion 17.25 ofthe comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, fileusenet/net.sources/ansi.c.grammar.Z.
I intend to keep this version as close to the current C Standard grammaras possible; please let me know if you discover discrepancies.
Jutta Degener, 1995
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
#include <stdio.h>
#include "y.tab.h"
void count();
%}
%%
"/*" { comment(); }
"auto" { count(); return(AUTO); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"extern" { count(); return(EXTERN); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"int" { count(); return(INT); }
"long" { count(); return(LONG); }
"register" { count(); return(REGISTER); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"signed" { count(); return(SIGNED); }
"sizeof" { count(); return(SIZEOF); }
"static" { count(); return(STATIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"unsigned" { count(); return(UNSIGNED); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
{L}({L}|{D})* { count(); return(check_type()); }
0[xX]{H}+{IS}? { count(); return(CONSTANT); }
0{D}+{IS}? { count(); return(CONSTANT); }
{D}+{IS}? { count(); return(CONSTANT); }
L?'(\\.|[^\\'])+' { count(); return(CONSTANT); }
{D}+{E}{FS}? { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"->" { count(); return(PTR_OP); }
"&&" { count(); return(AND_OP); }
"||" { count(); return(OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(';'); }
("{"|"<%") { count(); return('{'); }
("}"|"%>") { count(); return('}'); }
"," { count(); return(','); }
":" { count(); return(':'); }
"=" { count(); return('='); }
"(" { count(); return('('); }
")" { count(); return(')'); }
("["|"<:") { count(); return('['); }
("]"|":>") { count(); return(']'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"!" { count(); return('!'); }
"~" { count(); return('~'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
[ \t\v\n\f] { count(); }
. { /* ignore bad characters */ }
%%
yywrap()
{
return(1);
}
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
putchar(c);
if ((c1 = input()) != '/' && c != 0)
{
unput(c1);
goto loop;
}
if (c != 0)
putchar(c1);
}
int column = 0;
void count()
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
ECHO;
}
int check_type()
{
/*
* pseudo code --- this is what it should check
*
* if (yytext == type_name)
* return(TYPE_NAME);
*
* return(IDENTIFIER);
*/
/*
* it actually will only return IDENTIFIER
*/
return(IDENTIFIER);
}
ANSI C Yacc grammar
In 1985, Jeff Lee published his Yacc grammar (which isaccompanied by a matchingLex specification)for the April 30, 1985draft version of the ANSI C standard. Tom Stockfisch reposted it tonet.sources in 1987; that original, as mentioned in the answer toquestion 17.25 ofthe comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, fileusenet/net.sources/ansi.c.grammar.Z.
Jutta Degener, 1995
%token IDENTIFIERCONSTANTSTRING_LITERALSIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGNDIV_ASSIGNMOD_ASSIGNADD_ASSIGN
%token SUB_ASSIGNLEFT_ASSIGNRIGHT_ASSIGNAND_ASSIGN
%token XOR_ASSIGNOR_ASSIGNTYPE_NAME
%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%start translation_unit
%%
| CONSTANT
| '(' expression')'
;
|postfix_expression '[' expression']'
|postfix_expression '(' ')'
|postfix_expression '(' argument_expression_list')'
|postfix_expression '.' IDENTIFIER
|postfix_expression PTR_OP IDENTIFIER
|postfix_expression INC_OP
|postfix_expression DEC_OP
;
|argument_expression_list ',' assignment_expression
;
| INC_OPunary_expression
| DEC_OPunary_expression
| unary_operatorcast_expression
| SIZEOFunary_expression
;
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
| '(' type_name')' cast_expression
;
|multiplicative_expression '*' cast_expression
|multiplicative_expression '/' cast_expression
|multiplicative_expression '%' cast_expression
;
|additive_expression '+' multiplicative_expression
|additive_expression '-' multiplicative_expression
;
|shift_expression LEFT_OP additive_expression
|shift_expression RIGHT_OP additive_expression
;
|relational_expression '<' shift_expression
|relational_expression '>' shift_expression
|relational_expression LE_OP shift_expression
|relational_expression GE_OP shift_expression
;
|equality_expression EQ_OP relational_expression
|equality_expression NE_OP relational_expression
;
|and_expression '&' equality_expression
;
|exclusive_or_expression '^' and_expression
;
|inclusive_or_expression '|' exclusive_or_expression
;
|logical_and_expression AND_OP inclusive_or_expression
;
|logical_or_expression OR_OP logical_and_expression
;
| logical_or_expression'?' expression':' conditional_expression
;
| unary_expressionassignment_operatorassignment_expression
;
: '='
;
| expression',' assignment_expression
;
;
| declaration_specifiersinit_declarator_list';'
;
| storage_class_specifierdeclaration_specifiers
| type_specifierdeclaration_specifiers
| type_qualifierdeclaration_specifiers
;
| init_declarator_list',' init_declarator
;
| declarator'=' initializer
;
: TYPEDEF
| EXTERN
| STATIC
| AUTO
| REGISTER
;
: VOID
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
;
: struct_or_unionIDENTIFIER'{'struct_declaration_list'}'
| struct_or_union'{' struct_declaration_list'}'
;
: STRUCT
| UNION
;
|struct_declaration_list struct_declaration
;
: specifier_qualifier_liststruct_declarator_list';'
;
: type_specifierspecifier_qualifier_list
| type_qualifierspecifier_qualifier_list
;
|struct_declarator_list ',' struct_declarator
;
| ':' constant_expression
| declarator':' constant_expression
;
: ENUM '{' enumerator_list'}'
| ENUM IDENTIFIER'{' enumerator_list'}'
;
|enumerator_list ',' enumerator
;
| IDENTIFIER'=' constant_expression
;
: CONST
| VOLATILE
;
;
| '(' declarator')'
|direct_declarator '[' constant_expression']'
|direct_declarator '[' ']'
|direct_declarator '(' parameter_type_list')'
|direct_declarator '(' identifier_list')'
|direct_declarator '(' ')'
;
: '*'
| '*' type_qualifier_list
| '*'pointer
| '*' type_qualifier_listpointer
;
|type_qualifier_list type_qualifier
;
| parameter_list',' ELLIPSIS
;
| parameter_list',' parameter_declaration
;
: declaration_specifiersdeclarator
| declaration_specifiersabstract_declarator
;
|identifier_list ',' IDENTIFIER
;
| specifier_qualifier_listabstract_declarator
;
: pointer
| pointerdirect_abstract_declarator
;
: '(' abstract_declarator')'
| '[' ']'
| '[' constant_expression']'
|direct_abstract_declarator '[' ']'
|direct_abstract_declarator '[' constant_expression']'
| '(' ')'
| '(' parameter_type_list')'
|direct_abstract_declarator '(' ')'
|direct_abstract_declarator '(' parameter_type_list')'
;
| '{' initializer_list'}'
| '{' initializer_list',' '}'
;
|initializer_list ',' initializer
;
;
: IDENTIFIER':' statement
| CASE constant_expression':' statement
;
: '{' '}'
| '{' statement_list'}'
| '{' declaration_list'}'
| '{' declaration_liststatement_list'}'
;
|declaration_list declaration
;
|statement_list statement
;
: ';'
| expression';'
;
: IF '(' expression')' statement
| IF '(' expression')' statementELSEstatement
| SWITCH '(' expression')' statement
;
: WHILE '(' expression')' statement
| DO statementWHILE '('expression')' ';'
| FOR '(' expression_statementexpression_statement')'statement
| FOR '(' expression_statementexpression_statementexpression')'statement
;
: GOTO IDENTIFIER';'
| CONTINUE';'
| BREAK ';'
| RETURN ';'
| RETURN expression';'
;
|translation_unit external_declaration
;
;
: declaration_specifiersdeclaratordeclaration_listcompound_statement
| declaration_specifiersdeclaratorcompound_statement
| declaratordeclaration_listcompound_statement
| declaratorcompound_statement
;
declaration_list
: declaration
| declaration_list declaration
;
%%
#include <stdio.h>
extern char yytext[];
extern int column;
yyerror(s)
char *s;
{
fflush(stdout);
printf("\n%*s\n%*s\n",column, "^", column, s);
}