[educoder] 编译原理 用JAVACC生成并扩充C语言的语法分析器(二)

第2关 构建C语言的语法分析器

/*

  C grammar definition for use with JavaCC
  Contributed by Doug South (dsouth@squirrel.com.au) 21/3/97

  This parser assumes that the C source file has been preprocessed : all
  #includes have been included and all macros have been expanded. I accomplish
  this with "gcc -P -E <source file> > <output file>".

  There is a problem with compiler specific types, such as __signed, __const,
  __inline__, etc. These types can be added as typedef types before the parser
  is run on a file. See main() for an example. I have also found a strange little
  compiler specific "type" if you can call it that. It is __attribute__, but it
  does not seem to be used as a type. I found that just deleting the __attribute__
  and the following "offensive" code works.

  This grammar also prints out all the types defined while parsing the file. This
  is done via a call to printTypes() when the parser is complete. If you do not want
  this, just comment out the printTypes() method call in the production rule
  TranslationUnit(), which BTW is the root node for parsing a C source file.

  I have not in anyway extensively tested this grammar, in fact it is barely tested,
  but I imagine it is better to have a starting point for a C grammar other than from
  scratch. It has not been optimized in anyway, my main aim was to get a parser that
  works. Lookahead may not be optimum at choice points and may even be insufficient at
  times. I choose to err on the side of not optimum if I made a choice at all.

  If you use this grammar, I would appreciate hearing from you. I will try to maintain
  this grammar to the best of my ability, but at this point in time, this is only a side
  hobby (unless someone wants to pay me for doing JavaCC work!). In that regards, I am
  interested in hearing bugs and comments.

  TODO:

    Insert the appropriate code to enable C source trees from this grammar.

=============================================
3/2/06: Modified by Tom Copeland
- STRING_LITERAL now handles embedded escaped newlines, thanks to J.Chris Findlay for the patch
- Works with JavaCC 4.0
- Preprocessor directives are now simply SKIP'd, so no need to run C files through GCC first

  */

PARSER_BEGIN(CParser)

import java.util.*;

  public class CParser{

    // Hastable for storing typedef types
    private static Set types = new HashSet();

    // Stack for determining when the parser
    // is parsing a typdef definition.
    private static Stack typedefParsingStack = new Stack();

    // Returns true if the given string is
    // a typedef type.
    private static boolean isType(String type){
   	  return types.contains(type);
    }

    // Add a typedef type to those already defined
    private static void addType(String type){
   	  types.add(type);
    }

    // Prints out all the types used in parsing the c source
    private static void printTypes(){
      for (Iterator i = types.iterator(); i.hasNext();) {
        System.out.println(i.next());
      }
    }

    // Run the parser
    public static void main ( String args [ ] ) {
      CParser parser ;

  	  // Hack to include type "special types"
	    types.add("__signed__");
	    types.add("__const");
	    types.add("__inline__");
	    types.add("__signed");

      if(args.length == 0){
        System.out.println("C Parser Version 0.1Alpha:  Reading from standard input . . .");
        parser = new CParser(System.in);
      }
      else if(args.length == 1){
        System.out.println("C Parser Version 0.1Alpha:  Reading from file " + args[0] + " . . ." );
      try {
        parser = new CParser(new java.io.FileInputStream(args[0]));
      }
      catch(java.io.FileNotFoundException e){
        System.out.println("C Parser Version 0.1:  File " + args[0] + " not found.");
        return ;
        }
      }
      else {
        System.out.println("C Parser Version 0.1Alpha:  Usage is one of:");
        System.out.println("         java CParser < inputfile");
        System.out.println("OR");
        System.out.println("         java CParser inputfile");
        return ;
      }
      try {
        parser.TranslationUnit();
        System.out.println("C Parser Version 0.1Alpha:  Java program parsed successfully.");
      }
      catch(ParseException e){
        System.out.println("C Parser Version 0.1Alpha:  Encountered errors during parse.");
        e.printStackTrace();
      }
    }
  }

PARSER_END(CParser)

SKIP : {
 " "
|  "\t"
|  "\n"
|  "\r"
|  <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
|  <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/">
| "#" : PREPROCESSOR_OUTPUT
}

<PREPROCESSOR_OUTPUT> SKIP:
{
     "\n" : DEFAULT
}

<PREPROCESSOR_OUTPUT> MORE:
{
 "\\\n"
 |
 "\\\r\n"
 |
 < ~[] >
}


TOKEN : {
 <INTEGER_LITERAL: <DECIMAL_LITERAL> (["l","L"])? | <HEX_LITERAL> (["l","L"])? | <OCTAL_LITERAL> (["l","L"])?>
|  <#DECIMAL_LITERAL: ["1"-"9"] (["0"-"9"])*>
|  <#HEX_LITERAL: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+>
|  <#OCTAL_LITERAL: "0" (["0"-"7"])*>
|  <FLOATING_POINT_LITERAL: (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? (["f","F","d","D"])? | "." (["0"-"9"])+ (<EXPONENT>)? (["f","F","d","D"])? | (["0"-"9"])+ <EXPONENT> (["f","F","d","D"])? | (["0"-"9"])+ (<EXPONENT>)? ["f","F","d","D"]>
|  <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
|  <CHARACTER_LITERAL: "\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"])) "\'">
|  <STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])?  | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"">
}

TOKEN : {
	<CONTINUE: "continue"> |
	<VOLATILE: "volatile"> |
	<REGISTER: "register"> |
	<UNSIGNED: "unsigned"> |
	<TYPEDEF: "typedef"> |
	<DFLT: "default"> |
	<DOUBLE: "double"> |
	<SIZEOF: "sizeof"> |
	<SWITCH: "switch"> |
	<RETURN: "return"> |
	<EXTERN: "extern"> |
	<STRUCT: "struct"> |
	<STATIC: "static"> |
	<SIGNED: "signed"> |
	<WHILE: "while"> |
	<BREAK: "break"> |
	<UNION: "union"> |
	<CONST: "const"> |
	<FLOAT: "float"> |
	<SHORT: "short"> |
	<ELSE: "else"> |
	<CASE: "case"> |
	<LONG: "long"> |
	<ENUM: "enum"> |
	<AUTO: "auto"> |
	<VOID: "void"> |
	<CHAR: "char"> |
	<GOTO: "goto"> |
	<FOR: "for"> |
	<INT: "int"> |
	<IF: "if"> |
	<DO: "do">
}

TOKEN : {
 <IDENTIFIER: <LETTER> (<LETTER> | <DIGIT>)*>
|  <#LETTER: ["$","A"-"Z","_","a"-"z"]>
|  <#DIGIT: ["0"-"9"]>
}

void TranslationUnit() : {}
{
	(ExternalDeclaration())+
	{printTypes();}
}

void ExternalDeclaration() : {}
{
	( LOOKAHEAD( FunctionDefinition() ) FunctionDefinition() | Declaration())
}

void FunctionDefinition() : {}
{
	[LOOKAHEAD(DeclarationSpecifiers()) DeclarationSpecifiers()] Declarator() [ DeclarationList() ]
	CompoundStatement()
}

void Declaration() : {}
{
	DeclarationSpecifiers() [ InitDeclaratorList() ] ";"
}

void DeclarationList() : {}
{
	( LOOKAHEAD(Declaration()) Declaration() )+
}

void DeclarationSpecifiers() : {}
{
	  StorageClassSpecifier() [ LOOKAHEAD(DeclarationSpecifiers())
	        DeclarationSpecifiers() ] |
	  TypeSpecifier()  [ LOOKAHEAD(DeclarationSpecifiers())
	        DeclarationSpecifiers() ] |
	  TypeQualifier() [ LOOKAHEAD(DeclarationSpecifiers())
	        DeclarationSpecifiers() ]
}

void StorageClassSpecifier() : {}
{
	( <AUTO> | <REGISTER> | <STATIC> | <EXTERN> | <TYPEDEF>
	{
		typedefParsingStack.push(Boolean.TRUE);
	} )
}

void TypeSpecifier() : {}
{
	( <VOID> | <CHAR> | <SHORT> | <INT> | <LONG> | <FLOAT> | <DOUBLE> | <SIGNED> |
	  <UNSIGNED> | StructOrUnionSpecifier() | EnumSpecifier() | LOOKAHEAD( { isType(getToken(1).image) } )TypedefName() )
}

void TypeQualifier() : {}
{
	( <CONST> | <VOLATILE> )
}

void StructOrUnionSpecifier() : {}
{
	{
			typedefParsingStack.push(Boolean.FALSE);
	}

	StructOrUnion() ( LOOKAHEAD(3) [ <IDENTIFIER> ] "{" StructDeclarationList() "}" | <IDENTIFIER> )

	{
		typedefParsingStack.pop();
	}
}

void StructOrUnion() : {}
{
	( <STRUCT> | <UNION> )
}

void StructDeclarationList() : {}
{
	(StructDeclaration())+
}

void InitDeclaratorList() : {}
{
	InitDeclarator() ("," InitDeclarator())*
	{
		// Finished with a typedefDeclaration??
		if(!(typedefParsingStack.empty()) && ((Boolean)typedefParsingStack.peek()).booleanValue()){
		   	typedefParsingStack.pop();
	  }
	}
}

void InitDeclarator() : {}
{
	Declarator() [ "=" Initializer() ]
}

void StructDeclaration() : {}
{
	SpecifierQualifierList() StructDeclaratorList() ";"
}

void SpecifierQualifierList() : {}
{
	  TypeSpecifier() [ LOOKAHEAD(SpecifierQualifierList())
	        SpecifierQualifierList() ]|
	  TypeQualifier() [ LOOKAHEAD(SpecifierQualifierList())
	        SpecifierQualifierList() ]
}

void StructDeclaratorList() : {}
{
	StructDeclarator() ( "," StructDeclarator() )*
}

void StructDeclarator() : {}
{
	( LOOKAHEAD(3) Declarator() | [ Declarator() ] ":" ConstantExpression() )
}

void EnumSpecifier() : {}
{
	<ENUM> ( LOOKAHEAD(3) [ <IDENTIFIER> ] "{" EnumeratorList() "}" | <IDENTIFIER> )
}

void EnumeratorList() : {}
{
	Enumerator() ("," Enumerator())*
}

void Enumerator() : {}
{
	<IDENTIFIER> [ "=" ConstantExpression() ]
}

void Declarator() : {}
{
	[ Pointer() ] DirectDeclarator()
}

void DirectDeclarator() : { Token t;}
{
	( t = <IDENTIFIER>

	{ if(!(typedefParsingStack.empty()) && ((Boolean)typedefParsingStack.peek()).booleanValue()){
				addType(t.image);
	  }
	}
	 | "(" Declarator() ")" )
  
  { typedefParsingStack.push( Boolean.FALSE ); }

	( "[" [ ConstantExpression() ] "]" |
	    LOOKAHEAD(3) "(" ParameterTypeList() ")" |
	    "(" [ IdentifierList() ] ")" )*
      { typedefParsingStack.pop(); }
}

void Pointer() : {}
{
	"*" [ TypeQualifierList() ] [ Pointer() ]
}

void TypeQualifierList() : {}
{
	(TypeQualifier())+
}

void ParameterTypeList() : {}
{
	ParameterList() ["," "..." ]
}

void ParameterList() : {}
{
	ParameterDeclaration() (LOOKAHEAD(2) "," ParameterDeclaration())*
}

void ParameterDeclaration() : {}
{
	DeclarationSpecifiers() ( LOOKAHEAD(Declarator()) Declarator() | [ AbstractDeclarator() ] )
}

void IdentifierList() : {}
{
	<IDENTIFIER> ("," <IDENTIFIER>)*
}

void Initializer() : {}
{
	( AssignmentExpression() |
	  "{" InitializerList() [","] "}" )
}

void InitializerList() : {}
{
	Initializer() (LOOKAHEAD(2) "," Initializer())*
}

void TypeName() : {}
{
	SpecifierQualifierList() [ AbstractDeclarator() ]

}

void AbstractDeclarator() : {}
{
	( LOOKAHEAD(3) Pointer() |
	  [Pointer()] DirectAbstractDeclarator() )
}

void DirectAbstractDeclarator() : {}
{
	( LOOKAHEAD(2) "(" AbstractDeclarator() ")" |
	               "[" [ConstantExpression()] "]" |
	               "(" [ParameterTypeList()] ")" )

	  ( "[" [ ConstantExpression() ] "]" | "(" [ ParameterTypeList() ] ")" )*
}

void TypedefName() : {}
{
	<IDENTIFIER>
}

void Statement() : {}
{
	( LOOKAHEAD(2) LabeledStatement() |
	  ExpressionStatement() |
	  CompoundStatement() |
	  SelectionStatement() |
	  IterationStatement() |
	  JumpStatement() )
}

void LabeledStatement() : {}
{
	( <IDENTIFIER> ":" Statement() |
	  <CASE> ConstantExpression() ":" Statement() |
	  <DFLT> ":" Statement() )
}

void ExpressionStatement() : {}
{
	[ Expression() ] ";"
}

void CompoundStatement() : {}
{
	"{" [ LOOKAHEAD(DeclarationList()) DeclarationList() ]
	    [ StatementList() ]
	"}"
}

void StatementList() : {}
{
	(Statement())+
}

void SelectionStatement() : {}
{
	( <IF> "(" Expression() ")" Statement() [ LOOKAHEAD(2) <ELSE> Statement() ] |
	  <SWITCH> "(" Expression() ")" Statement() )
}

void IterationStatement() : {}
{
	( <WHILE> "(" Expression() ")" Statement() |
	  <DO> Statement() <WHILE> "(" Expression() ")" ";" |
	  <FOR> "(" [ Expression() ] ";" [ Expression() ] ";" [ Expression() ] ")" Statement() )
}

void JumpStatement() : {}
{
	( <GOTO> <IDENTIFIER> ";" |
	  <CONTINUE> ";" |
	  <BREAK> ";" |
	  <RETURN> [ Expression() ] ";" )
}

void Expression() : {}
{
	AssignmentExpression() ( "," AssignmentExpression() )*
}

void AssignmentExpression() : {}
{
	  LOOKAHEAD(UnaryExpression() AssignmentOperator()) UnaryExpression() AssignmentOperator() AssignmentExpression() |
	  LOOKAHEAD(3) ConditionalExpression()
}

void AssignmentOperator() : {}
{
	( "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" )
}

void ConditionalExpression() : {}
{
	LogicalORExpression() [ "?" Expression() ":" ConditionalExpression() ]
}

void ConstantExpression() : {}
{
	ConditionalExpression()
}

void LogicalORExpression() : {}
{
	LogicalANDExpression() [ "||" LogicalORExpression() ]
}

void LogicalANDExpression() : {}
{
	InclusiveORExpression() [ "&&" LogicalANDExpression() ]
}

void InclusiveORExpression() : {}
{
	ExclusiveORExpression() [ "|" InclusiveORExpression() ]
}

void ExclusiveORExpression() : {}
{
	ANDExpression() [ "^" ExclusiveORExpression() ]
}

void ANDExpression() : {}
{
	EqualityExpression() [ "&" ANDExpression() ]
}

void EqualityExpression() : {}
{
	RelationalExpression() [ ( "==" | "!=" ) EqualityExpression() ]
}

void RelationalExpression() : {}
{
	ShiftExpression() [ ( "<" | ">" | "<=" | ">=" ) RelationalExpression() ]
}

void ShiftExpression() : {}
{
	AdditiveExpression() [ ( "<<" | ">>" ) ShiftExpression() ]
}

void AdditiveExpression() : {}
{
	MultiplicativeExpression() [ ( "+" | "-" ) AdditiveExpression() ]
}

void MultiplicativeExpression() : {}
{
	CastExpression() [ ( "*" | "/" | "%" ) MultiplicativeExpression() ]
}

void CastExpression() : {}
{
	( LOOKAHEAD("(" TypeName() ")" CastExpression() ) "(" TypeName() ")" CastExpression() |
	  UnaryExpression() )
}

void UnaryExpression() : {}
{
	( LOOKAHEAD(3) PostfixExpression() |
	  "++" UnaryExpression() |
	  "--" UnaryExpression() |
	  UnaryOperator() CastExpression() |
	  <SIZEOF> ( LOOKAHEAD(UnaryExpression() ) UnaryExpression() | "(" TypeName() ")" ) )
}

void UnaryOperator() : {}
{
	( "&" | "*" | "+" | "-" | "~" | "!" )
}

void PostfixExpression() : {}
{
	PrimaryExpression() ( "[" Expression() "]" |
	                      "(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" |
	  					  "." <IDENTIFIER> |
	  					  "->" <IDENTIFIER> |
	  					  "++" |
	  					  "--" )*
}

void PrimaryExpression() : {}
{
	( <IDENTIFIER> |
	  Constant() |
	  "(" Expression() ")" )
}

void ArgumentExpressionList() : {}
{
	AssignmentExpression() ( "," AssignmentExpression() )*
}

void Constant() : {}
{
 <INTEGER_LITERAL> | <FLOATING_POINT_LITERAL> | <CHARACTER_LITERAL> | <STRING_LITERAL>
}
  • 4
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值