假设我们有一个类具有一些属性。这个类有很多实例。给定一个过滤的规则,挑选符合规则的实例。
假设规则使用类似C语言的语句进行描述。
例子:
属性列表
width int
height int
fps float
os string
一种可能的过滤器:
width > 640 || height < 450
1. 构造词法器
filter.l
%{
#include <stdio.h>
#include <stdlib.h>
// override input
extern int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead);
#undef YY_INPUT
#define YY_INPUT(b,r,s) readInputForLexer(b, &r, s)
#include "filter.tab.h"
void yyerror(char *); // need declare this func
// yytext: current match, string
// modify yylval (of type YYSTYPE; optional) and return component type
%}
%%
[\t\n ] ;
[0-9]+ { yylval.iVal = atoi(yytext); return INTEGER; }
[0-9]*\.[0-9]+ { yylval.fVal = atof(yytext); return FLOAT; }
[a-zA-Z/][a-zA-Z/0-9]* { yylval.sVal = strdup(yytext); return ID; }
[\+\-\*\/\%] { return *yytext; }
== { return EQ; }
\> { return GT; }
\< { return LT; }
\>= { return GE; }
\<= { return LE; }
!= { return NEQ; }
\|\| { return OR; }
\&\& { return AND; }
\( { return '('; }
\) { return ')'; }
\"(\\.|[^\\"])*\" { yylval.sVal = strdup(yytext); return STRING; }
. { yyerror("Bad character"); }
%%
// https://www.ibm.com/developerworks/cn/linux/sdk/lex/
// used for multiple source files. here simply return 1 to
// stop parse at EOF
int yywrap(void)
{
return 1;
}
2. 创建yacc文件
%{
/* Created by Zhenyong Chen, 2017/07/30
*
* This demo is going to create a tree structured filter.
* A has a list of properties
* There's a set of rules
* The filter will check whether A is qualified
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include "syntax.h"
#undef YYLEX_PARAM
extern int yylex();
void yyerror(char *);
static Operand *_outputTree = NULL;
%}
// http://www.gnu.org/software/bison/manual/html_node/Token-Decl.html
// override default YYSTYPE (YYSTYPE yylval in lex file)
%union {
float fVal;
int iVal;
char *sVal;
struct Operand *synNode;
}
%token <iVal> INTEGER "int"
%token <fVal> FLOAT "float"
%token <sVal> ID
%token EQ "=="
%token NEQ "!="
%token GT ">"
%token LT "<"
%token GE ">="
%token LE "<="
%token OR "||"
%token AND "&&"
%token <sVal> STRING
%left '+' '-'
%left '*' '/' '%'
// tell parser the nonterminal node type
%type<synNode> program
%type<synNode> expr
%type<synNode> AndExpr
%type<synNode> OrExpr
%%
// $$, $1, ... are of type YYSTYPE (http://pubs.opengroup.org/onlinepubs/7908799/xcu/yacc.html)
program:
OrExpr { $$ = $1; _outputTree = $$;}
| { $$ = NULL; _outputTree = $$; } // could be empty program
;
expr:
ID "==" STRING { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); free($3); }
| ID "!=" STRING { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); free($3); }
| ID "==" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "!=" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "==" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "!=" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_NE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID ">" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_GT,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID ">" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_GT,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "<" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_LT,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "<" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_LT,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID ">=" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_GE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID ">=" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_GE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "<=" INTEGER { $$ = Operand::CreateNonTermNode(Operand::OP_LE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| ID "<=" FLOAT { $$ = Operand::CreateNonTermNode(Operand::OP_LE,
Operand::CreateVarNode($1),
Operand::CreateValueNode($3)); free($1); }
| '(' OrExpr ')' { $$ = $2; }
;
AndExpr:
expr { $$ = $1; }
| AndExpr AND expr { $$ = Operand::CreateNonTermNode(Operand::OP_AND, $1, $3); }
;
OrExpr:
AndExpr { $$ = $1; }
| OrExpr "||" AndExpr { $$ = Operand::CreateNonTermNode(Operand::OP_OR, $1, $3); }
;
%%
void yyerror(char *s)
{
printf("Compile error: %s\n", s);
}
static char *_buf = NULL;
static int _readOffset = 0;
int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead)
{
int numBytesToRead = maxBytesToRead;
int bytesRemaining = (int)strlen(_buf) - _readOffset;
if(bytesRemaining == 0) {
*numBytesRead = 0;
return -1;
}
if (numBytesToRead > bytesRemaining)
{
numBytesToRead = bytesRemaining;
}
memcpy(buffer, _buf+_readOffset, numBytesToRead);
*numBytesRead = numBytesToRead;
_readOffset += numBytesToRead;
return 0;
}
Operand *parseProgram(const char *text)
{
if(text == NULL)
return NULL;
_buf = (char *)malloc(strlen(text) + 64);
strcpy(_buf, text);
_readOffset = 0;
_outputTree = NULL;
yyparse();
free(_buf);
_buf = NULL;
Operand *ret = _outputTree;
_outputTree = NULL;
return ret;
}
3. 语法树
syntax.h
#if !defined(__SYNTAX_H__)
#define __SYNTAX_H__
#include <string>
class Operand {
public:
enum OperatorType {
OP_AND = 1,
OP_OR,
OP_EQ,
OP_NE,
OP_GT,
OP_LT,
OP_GE,
OP_LE,
};
enum OperandType {
OP_NONTERM = 0,
OP_INT,
OP_FLOAT,
OP_STRING,
OP_VAR,
};
enum OperandType eType;
// if OP_NONTERM
enum OperatorType eOperator;
Operand *aOperands[2];
// if OP_VAR
std::string varName;
// otherwise, values
int iVal;
float fVal;
std::string sVal;
// create a leaf node: int/float/string
static Operand *CreateValueNode(int ival);
static Operand *CreateValueNode(float fval);
static Operand *CreateValueNode(const char *sval);
static Operand *CreateVarNode(const char *name);
// create a non-terminal node
static Operand *CreateNonTermNode(enum OperatorType opType, Operand *operand1 = NULL, Operand *operand2 = NULL);
virtual ~Operand();
void Dump(int margin);
private:
Operand();
};
#endif /* __SYNTAX_H__ */
syntax.cpp
#include "syntax.h"
Operand::Operand()
{
eType = OP_INT;
iVal = 0;
}
Operand::~Operand()
{
if(eType == OP_NONTERM) {
for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {
if(aOperands[i] != NULL) {
delete aOperands[i];
aOperands[i] = NULL;
}
}
}
}
Operand *Operand::CreateValueNode(int ival)
{
Operand *op = new Operand();
op->eType = OP_INT;
op->iVal = ival;
return op;
}
Operand *Operand::CreateValueNode(float fval)
{
Operand *op = new Operand();
op->eType = OP_FLOAT;
op->fVal = fval;
return op;
}
Operand *Operand::CreateValueNode(const char *sval)
{
Operand *op = new Operand();
op->eType = OP_STRING;
op->sVal = sval;
return op;
}
Operand *Operand::CreateVarNode(const char *name)
{
Operand *op = new Operand();
op->eType = OP_VAR;
op->varName = name;
return op;
}
Operand *Operand::CreateNonTermNode(enum OperatorType opType,
Operand *operand1/* = NULL*/,
Operand *operand2/* = NULL*/)
{
Operand *op = new Operand();
op->eType = OP_NONTERM;
op->eOperator = opType;
op->aOperands[0] = operand1;
op->aOperands[1] = operand2;
return op;
}
static void print_margin(int margin)
{
while(margin > 0) {
printf(" ");
margin--;
}
}
const char *operatorTypeToString(enum Operand::OperatorType t)
{
switch(t) {
case Operand::OP_AND:
return "&&";
case Operand::OP_OR:
return "||";
case Operand::OP_EQ:
return "==";
case Operand::OP_NE:
return "!=";
case Operand::OP_GT:
return ">";
case Operand::OP_LT:
return "<";
case Operand::OP_GE:
return ">=";
case Operand::OP_LE:
return "<=";
default:
return "ERROR";
}
}
void Operand::Dump(int margin)
{
if(eType == OP_NONTERM) {
print_margin(margin);
// print operator
printf("%s {\n", operatorTypeToString(eOperator));
for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {
if(aOperands[i] != NULL) {
aOperands[i]->Dump(margin + 4);
}
}
print_margin(margin);
printf("}\n");
}
else
{
print_margin(margin);
switch(eType) {
case OP_INT:
printf("%d\n", iVal);
break;
case OP_FLOAT:
printf("%f\n", fVal);
break;
case OP_STRING:
printf("%s\n", sVal.c_str());
break;
case OP_VAR:
printf("%s\n", varName.c_str());
break;
default:
break;
}
}
}
4. 使用方法
main.cpp
#include <stdio.h>
#include "syntax.h"
#include <string>
#include <list>
extern Operand *parseProgram(const char *text);
// evaluate synstax tree
struct compoundValue {
int type; // 0: void; 1: int; 2: float; 3: std::string; 4: bool
int iVal;
float fVal;
std::string sVal;
bool bVal;
};
void printValue(struct compoundValue &v)
{
if(v.type == 1) {
printf("(int)%d\n", v.iVal);
}
else if(v.type == 2) {
printf("(float)%f\n", v.fVal);
}
else if(v.type == 3) {
printf("(string)%s\n", v.sVal.c_str());
}
else if(v.type == 4) {
printf("(bool)%s\n", v.bVal ? "true" : "false");
}
else {
// should not reach here
}
}
// define several variables
struct variable {
std::string name;
struct compoundValue value;
};
std::list<struct variable> gVariableList;
void initVarList()
{
struct variable v;
v.name = "width";
v.value.type = 1; // int
v.value.iVal = 640;
gVariableList.push_back(v);
v.name = "height";
v.value.type = 1; // int
v.value.iVal = 360;
gVariableList.push_back(v);
v.name = "fps";
v.value.type = 2; // float
v.value.fVal = 25.0;
gVariableList.push_back(v);
v.name = "os";
v.value.type = 3; // std::string
v.value.sVal = "Android";
gVariableList.push_back(v);
v.name = "enabled";
v.value.type = 4; // bool
v.value.bVal = true;
gVariableList.push_back(v);
}
int get_variable_type(std::string varName)
{
std::list<struct variable>::iterator it;
for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
struct variable &v = *it;
if(v.name.compare(varName) == 0) {
return v.value.type;
}
}
return -1; // should not reach here
}
int get_variable_value_i(std::string varName)
{
std::list<struct variable>::iterator it;
for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
struct variable &v = *it;
if(v.name.compare(varName) == 0) {
return v.value.iVal;
}
}
return 0; // should not reach here
}
float get_variable_value_f(std::string varName)
{
std::list<struct variable>::iterator it;
for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
struct variable &v = *it;
if(v.name.compare(varName) == 0) {
return v.value.fVal;
}
}
return 0.0f; // should not reach here
}
std::string get_variable_value_s(std::string varName)
{
std::list<struct variable>::iterator it;
for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
struct variable &v = *it;
if(v.name.compare(varName) == 0) {
return v.value.sVal;
}
}
return ""; // should not reach here
}
bool get_variable_value_b(std::string varName)
{
std::list<struct variable>::iterator it;
for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {
struct variable &v = *it;
if(v.name.compare(varName) == 0) {
return v.value.bVal;
}
}
return false; // should not reach here
}
struct compoundValue eval_syntax(Operand &op)
{
struct compoundValue v;
// traverse the tree
if(op.eType == Operand::OP_INT) {
v.type = 1;
v.iVal = op.iVal;
}
else if(op.eType == Operand::OP_FLOAT) {
v.type = 2;
v.fVal = op.fVal;
}
else if(op.eType == Operand::OP_STRING) {
v.type = 3;
v.sVal = op.sVal;
}
else if(op.eType == Operand::OP_VAR) {
v.type = get_variable_type(op.varName);
if(v.type == 1) {
v.iVal = get_variable_value_i(op.varName);
}
else if(v.type == 2) {
v.fVal = get_variable_value_f(op.varName);
}
else if(v.type == 3) {
v.sVal = get_variable_value_s(op.varName);
}
else if(v.type == 4) {
v.bVal = get_variable_value_b(op.varName);
}
}
else if(op.eType == Operand::OP_NONTERM) {
if(op.eOperator == Operand::OP_AND) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
v.bVal = left.bVal && right.bVal;
}
else if(op.eOperator == Operand::OP_OR) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
v.type = 4;
if(left.bVal) {
v.bVal = true;
}
else {
right = eval_syntax(*op.aOperands[1]);
v.bVal = right.bVal;
}
}
else if(op.eOperator == Operand::OP_EQ) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal == right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal == right.fVal);
else if(left.type == 3)
v.bVal = left.sVal.compare(right.sVal) == 0;
else if(left.type == 4)
v.bVal = (left.bVal == right.bVal);
}
else if(op.eOperator == Operand::OP_NE) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal != right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal != right.fVal);
else if(left.type == 3)
v.bVal = left.sVal.compare(right.sVal) != 0;
else if(left.type == 4)
v.bVal = (left.bVal != right.bVal);
}
else if(op.eOperator == Operand::OP_GT) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal > right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal > right.fVal);
}
else if(op.eOperator == Operand::OP_LT) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal < right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal < right.fVal);
}
else if(op.eOperator == Operand::OP_GE) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal >= right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal >= right.fVal);
}
else if(op.eOperator == Operand::OP_LE) {
struct compoundValue left, right;
left = eval_syntax(*op.aOperands[0]);
right = eval_syntax(*op.aOperands[1]);
v.type = 4;
if(left.type == 1)
v.bVal = (left.iVal <= right.iVal);
else if(left.type == 2)
v.bVal = (left.fVal <= right.fVal);
}
}
return v;
}
int main(int argc, const char **agrv)
{
initVarList();
const char *s = "width > 640 || height < 450";
printf("Input: \n%s\n", s);
Operand *p = parseProgram(s);
if(p != NULL) {
p->Dump(0);
struct compoundValue v = eval_syntax(*p);
delete p;
printf("Evaluated:\n");
printValue(v);
}
return 0;
}
6. 编译
#!/bin/bash
bison -d filter.y && flex filter.l || exit 1
g++ -o filter *.c *.cpp
exit 0
输出结果:
Input:
width > 640 || height < 450
|| {
> {
width
640
}
< {
height
450
}
}
Evaluated:
(bool)true