使用flex和bison创建过滤器

假设我们有一个类具有一些属性。这个类有很多实例。给定一个过滤的规则,挑选符合规则的实例。

假设规则使用类似C语言的语句进行描述。

例子:

属性列表

width     int

height    int

fps         float

os           string

一种可能的过滤器:

width > 640 || height < 450


1. 构造词法器

filter.l

%{
#include <stdio.h>
#include <stdlib.h>

// override input
extern int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead);
#undef YY_INPUT
#define YY_INPUT(b,r,s) readInputForLexer(b, &r, s)

#include "filter.tab.h"

void yyerror(char *); // need declare this func

// yytext: current match, string
// modify yylval (of type YYSTYPE; optional) and return component type

%}


%%
[\t\n ]                 ;
[0-9]+	                { yylval.iVal = atoi(yytext); return INTEGER; }
[0-9]*\.[0-9]+          { yylval.fVal = atof(yytext); return FLOAT;   }
[a-zA-Z/][a-zA-Z/0-9]*  { yylval.sVal = strdup(yytext); return ID; }
[\+\-\*\/\%]            { return *yytext; }
==                      { return EQ; }
\>                      { return GT; }
\<                      { return LT; }
\>=                     { return GE; }
\<=                     { return LE; }
!=                      { return NEQ; }
\|\|                    { return OR; }
\&\&                    { return AND; }
\(                      { return '('; }
\)                      { return ')'; }
\"(\\.|[^\\"])*\"       { yylval.sVal = strdup(yytext); return STRING; }
.                       { yyerror("Bad character"); }
%%

// https://www.ibm.com/developerworks/cn/linux/sdk/lex/
// used for multiple source files. here simply return 1 to
// stop parse at EOF
int yywrap(void)
{
    return 1;
}

2. 创建yacc文件

%{
/* Created by Zhenyong Chen, 2017/07/30
 *
 * This demo is going to create a tree structured filter.
 *     A has a list of properties
 *     There's a set of rules
 * The filter will check whether A is qualified
 *
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include "syntax.h"

#undef YYLEX_PARAM
extern int yylex();
void yyerror(char *);

static Operand *_outputTree = NULL;

%}

// http://www.gnu.org/software/bison/manual/html_node/Token-Decl.html

// override default YYSTYPE (YYSTYPE yylval in lex file)
%union {
    float fVal;
    int   iVal;
    char *sVal;
    struct Operand *synNode;
}
%token <iVal> INTEGER "int"
%token <fVal> FLOAT "float"
%token <sVal> ID
%token EQ  "=="
%token NEQ "!="
%token GT  ">"
%token LT  "<"
%token GE  ">="
%token LE  "<="
%token OR  "||"
%token AND "&&"
%token <sVal> STRING

%left '+' '-'
%left '*' '/' '%'

// tell parser the nonterminal node type
%type<synNode> program
%type<synNode> expr
%type<synNode> AndExpr
%type<synNode> OrExpr
%%

// $$, $1, ... are of type YYSTYPE (http://pubs.opengroup.org/onlinepubs/7908799/xcu/yacc.html)

program:
    OrExpr                  { $$ = $1; _outputTree = $$;}
    |                       { $$ = NULL; _outputTree = $$; } // could be empty program
    ;

expr:
    ID "==" STRING          { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); free($3); }
    | ID "!=" STRING        { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); free($3); }
    | ID "==" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "!=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "==" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "!=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID ">" INTEGER        { $$ = Operand::CreateNonTermNode(Operand::OP_GT,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID ">" FLOAT          { $$ = Operand::CreateNonTermNode(Operand::OP_GT,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "<" INTEGER        { $$ = Operand::CreateNonTermNode(Operand::OP_LT,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "<" FLOAT          { $$ = Operand::CreateNonTermNode(Operand::OP_LT,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID ">=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_GE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID ">=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_GE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "<=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_LE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | ID "<=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_LE,
                                        Operand::CreateVarNode($1),
                                        Operand::CreateValueNode($3)); free($1); }
    | '(' OrExpr ')'        { $$ = $2; }
    ;

AndExpr:
    expr                    { $$ = $1; }
    | AndExpr AND expr      { $$ = Operand::CreateNonTermNode(Operand::OP_AND, $1, $3); }
    ;

OrExpr:
    AndExpr                 { $$ = $1; }
    | OrExpr "||" AndExpr   { $$ = Operand::CreateNonTermNode(Operand::OP_OR, $1, $3); }
    ;

%%

void yyerror(char *s)
{
    printf("Compile error: %s\n", s);
}


static char *_buf = NULL;
static int _readOffset = 0;
int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead)
{
    int numBytesToRead = maxBytesToRead;
    int bytesRemaining = (int)strlen(_buf) - _readOffset;

    if(bytesRemaining == 0) {
        *numBytesRead = 0;
        return -1;
    }

    if (numBytesToRead > bytesRemaining)
    {
        numBytesToRead = bytesRemaining;
    }

    memcpy(buffer, _buf+_readOffset, numBytesToRead);

    *numBytesRead = numBytesToRead;
    _readOffset += numBytesToRead;

    return 0;
}

Operand *parseProgram(const char *text)
{
    if(text == NULL)
        return NULL;

    _buf = (char *)malloc(strlen(text) + 64);
    strcpy(_buf, text);
    _readOffset = 0;
    _outputTree = NULL;

    yyparse();

    free(_buf);
    _buf = NULL;

    Operand *ret = _outputTree;
    _outputTree = NULL;
    return ret;
}

3. 语法树

syntax.h

#if !defined(__SYNTAX_H__)
#define __SYNTAX_H__

#include <string>

class Operand {
public:
    enum OperatorType {
        OP_AND = 1,
        OP_OR,
        OP_EQ,
        OP_NE,
        OP_GT,
        OP_LT,
        OP_GE,
        OP_LE,
    };
    
    enum OperandType {
        OP_NONTERM = 0,
        OP_INT,
        OP_FLOAT,
        OP_STRING,
        OP_VAR,
    };

    enum OperandType eType;
    
    // if OP_NONTERM
    enum OperatorType eOperator;
    Operand *aOperands[2];

    // if OP_VAR
    std::string varName;
    
    // otherwise, values
    int iVal;
    float fVal;
    std::string sVal;
    
    // create a leaf node: int/float/string
    static Operand *CreateValueNode(int ival);
    static Operand *CreateValueNode(float fval);
    static Operand *CreateValueNode(const char *sval);
    static Operand *CreateVarNode(const char *name);
    // create a non-terminal node
    static Operand *CreateNonTermNode(enum OperatorType opType, Operand *operand1 = NULL, Operand *operand2 = NULL);

    virtual ~Operand();
    
    void Dump(int margin);
private:
    Operand();
};

#endif /* __SYNTAX_H__ */

syntax.cpp

#include "syntax.h"

Operand::Operand()
{
    eType = OP_INT;
    iVal = 0;
}

Operand::~Operand()
{
    if(eType == OP_NONTERM) {
        for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {
            if(aOperands[i] != NULL) {
                delete aOperands[i];
                aOperands[i] = NULL;
            }
        }
    }
}

Operand *Operand::CreateValueNode(int ival)
{
    Operand *op = new Operand();
    op->eType = OP_INT;
    op->iVal = ival;

    return op;
}

Operand *Operand::CreateValueNode(float fval)
{
    Operand *op = new Operand();
    op->eType = OP_FLOAT;
    op->fVal = fval;
    
    return op;
}

Operand *Operand::CreateValueNode(const char *sval)
{
    Operand *op = new Operand();
    op->eType = OP_STRING;
    op->sVal = sval;

    return op;
}

Operand *Operand::CreateVarNode(const char *name)
{
    Operand *op = new Operand();
    op->eType = OP_VAR;
    op->varName = name;
    
    return op;
}

Operand *Operand::CreateNonTermNode(enum OperatorType opType,
                                    Operand *operand1/* = NULL*/,
                                    Operand *operand2/* = NULL*/)
{
    Operand *op = new Operand();
    op->eType = OP_NONTERM;
    op->eOperator = opType;
    op->aOperands[0] = operand1;
    op->aOperands[1] = operand2;
    
    return op;

}

static void print_margin(int margin)
{
    while(margin > 0) {
        printf(" ");
        margin--;
    }
}

const char *operatorTypeToString(enum Operand::OperatorType t)
{
    switch(t) {
    case Operand::OP_AND:
        return "&&";
    case Operand::OP_OR:
        return "||";
    case Operand::OP_EQ:
        return "==";
    case Operand::OP_NE:
        return "!=";
    case Operand::OP_GT:
        return ">";
    case Operand::OP_LT:
        return "<";
    case Operand::OP_GE:
        return ">=";
    case Operand::OP_LE:
        return "<=";
    default:
        return "ERROR";
    }
}

void Operand::Dump(int margin)
{
    if(eType == OP_NONTERM) {
        print_margin(margin);
        // print operator
        printf("%s {\n", operatorTypeToString(eOperator));
        for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {
            if(aOperands[i] != NULL) {
                aOperands[i]->Dump(margin + 4);
            }
        }
        print_margin(margin);
        printf("}\n");
    }
    else
    {
        print_margin(margin);
        switch(eType) {
        case OP_INT:
            printf("%d\n", iVal);
            break;
        case OP_FLOAT:
            printf("%f\n", fVal);
            break;
        case OP_STRING:
            printf("%s\n", sVal.c_str());
            break;
        case OP_VAR:
            printf("%s\n", varName.c_str());
            break;
        default:
            break;
        }
    }

}

4. 使用方法

main.cpp

#include <stdio.h>
#include "syntax.h"
#include <string>
#include <list>

extern Operand *parseProgram(const char *text);

// evaluate synstax tree
struct compoundValue {
    int type; // 0: void; 1: int; 2: float; 3: std::string; 4: bool
    
    int iVal;
    float fVal;
    std::string sVal;
    bool bVal;
};

void printValue(struct compoundValue &v)
{
    if(v.type == 1) {
        printf("(int)%d\n", v.iVal);
    }
    else if(v.type == 2) {
        printf("(float)%f\n", v.fVal);
    }
    else if(v.type == 3) {
        printf("(string)%s\n", v.sVal.c_str());
    }
    else if(v.type == 4) {
        printf("(bool)%s\n", v.bVal ? "true" : "false");
    }
    else {
        // should not reach here
    }
}

// define several variables
struct variable {
    std::string name;
    struct compoundValue value;
};

std::list<struct variable> gVariableList;

void initVarList()
{
    struct variable v;
    v.name = "width";
    v.value.type = 1; // int
    v.value.iVal = 640;
    gVariableList.push_back(v);
    
    v.name = "height";
    v.value.type = 1; // int
    v.value.iVal = 360;
    gVariableList.push_back(v);
    
    v.name = "fps";
    v.value.type = 2; // float
    v.value.fVal = 25.0;
    gVariableList.push_back(v);

    v.name = "os";
    v.value.type = 3; // std::string
    v.value.sVal = "Android";
    gVariableList.push_back(v);

    v.name = "enabled";
    v.value.type = 4; // bool
    v.value.bVal = true;
    gVariableList.push_back(v);
}

int get_variable_type(std::string varName)
{
    std::list<struct variable>::iterator it;
    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
        struct variable &v = *it;
        if(v.name.compare(varName) == 0) {
            return v.value.type;
        }
    }
    return -1; // should not reach here
}

int get_variable_value_i(std::string varName)
{
    std::list<struct variable>::iterator it;
    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
        struct variable &v = *it;
        if(v.name.compare(varName) == 0) {
            return v.value.iVal;
        }
    }

    return 0; // should not reach here
}

float get_variable_value_f(std::string varName)
{
    std::list<struct variable>::iterator it;
    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
        struct variable &v = *it;
        if(v.name.compare(varName) == 0) {
            return v.value.fVal;
        }
    }
    
    return 0.0f; // should not reach here
}

std::string get_variable_value_s(std::string varName)
{
    std::list<struct variable>::iterator it;
    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
        struct variable &v = *it;
        if(v.name.compare(varName) == 0) {
            return v.value.sVal;
        }
    }
    
    return ""; // should not reach here
}

bool get_variable_value_b(std::string varName)
{
    std::list<struct variable>::iterator it;
    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
        struct variable &v = *it;
        if(v.name.compare(varName) == 0) {
            return v.value.bVal;
        }
    }
    
    return false; // should not reach here
}


struct compoundValue eval_syntax(Operand &op)
{
    struct compoundValue v;
    // traverse the tree
    if(op.eType == Operand::OP_INT) {
        v.type = 1;
        v.iVal = op.iVal;
    }
    else if(op.eType == Operand::OP_FLOAT) {
        v.type = 2;
        v.fVal = op.fVal;
    }
    else if(op.eType == Operand::OP_STRING) {
        v.type = 3;
        v.sVal = op.sVal;
    }
    else if(op.eType == Operand::OP_VAR) {
        v.type = get_variable_type(op.varName);
        if(v.type == 1) {
            v.iVal = get_variable_value_i(op.varName);
        }
        else if(v.type == 2) {
            v.fVal = get_variable_value_f(op.varName);
        }
        else if(v.type == 3) {
            v.sVal = get_variable_value_s(op.varName);
        }
        else if(v.type == 4) {
            v.bVal = get_variable_value_b(op.varName);
        }
    }
    else if(op.eType == Operand::OP_NONTERM) {
        if(op.eOperator == Operand::OP_AND) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            v.bVal = left.bVal && right.bVal;
        }
        else if(op.eOperator == Operand::OP_OR) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            v.type = 4;
            if(left.bVal) {
                v.bVal = true;
            }
            else {
                right = eval_syntax(*op.aOperands[1]);
                v.bVal = right.bVal;
            }
        }
        else if(op.eOperator == Operand::OP_EQ) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal == right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal == right.fVal);
            else if(left.type == 3)
                v.bVal = left.sVal.compare(right.sVal) == 0;
            else if(left.type == 4)
                v.bVal = (left.bVal == right.bVal);
        }
        else if(op.eOperator == Operand::OP_NE) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal != right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal != right.fVal);
            else if(left.type == 3)
                v.bVal = left.sVal.compare(right.sVal) != 0;
            else if(left.type == 4)
                v.bVal = (left.bVal != right.bVal);
        }
        else if(op.eOperator == Operand::OP_GT) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal > right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal > right.fVal);
        }
        else if(op.eOperator == Operand::OP_LT) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal < right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal < right.fVal);
        }
        else if(op.eOperator == Operand::OP_GE) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal >= right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal >= right.fVal);
        }
        else if(op.eOperator == Operand::OP_LE) {
            struct compoundValue left, right;
            left = eval_syntax(*op.aOperands[0]);
            right = eval_syntax(*op.aOperands[1]);
            v.type = 4;
            if(left.type == 1)
                v.bVal = (left.iVal <= right.iVal);
            else if(left.type == 2)
                v.bVal = (left.fVal <= right.fVal);
        }
    }
    
    return v;
}

int main(int argc, const char **agrv)
{
    initVarList();

    const char *s = "width > 640 || height < 450";
    printf("Input: \n%s\n", s);

    Operand *p = parseProgram(s);
    if(p != NULL) {
        p->Dump(0);
        struct compoundValue v = eval_syntax(*p);
        delete p;
        printf("Evaluated:\n");
        printValue(v);
    }
    return 0;
}

6. 编译


#!/bin/bash

bison -d filter.y && flex filter.l || exit 1

g++ -o filter *.c *.cpp

exit 0


输出结果:

Input: 
width > 640 || height < 450
|| {
    > {
        width
        640
    }
    < {
        height
        450
    }
}
Evaluated:
(bool)true

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值