可配置过滤表达式

多个维度过滤规则同时使用杂且乱,复杂过滤条件同时使用如何精简

{
    "filter_rules": [
        {
            "id": "R1",
            "op": "in",
            "dim": "event_type",
            "value": [
                "visit",
                "custom"
            ]
        },
        {
            "id": "R2",
            "op": "in",
            "dim": "city_id",
            "value": [
                10, 6
            ]
        },
        {
            "id": "R3",
            "op": "not in",
            "dim": "user_tag$gender",
            "value": [
                "female"
            ]
        }
    ],
    "rule_expr": "!R3 | (R1 & R2)"
}

配置过滤规则,使用表达式关联起来

一级节点二级节点数据类型描述示例备注
filter_rules
Array过滤规则集合
{
    "filter_rules": [
        {
            "id": "R1",
            "op": "in",
            "dim": "event_type",
            "value": [
                "visit",
                "custom"
            ]
        },
        {
            "id": "R2",
            "op": "in",
            "dim": "city_id",
            "value": [
                10, 6
            ]
        },
        {
            "id": "R3",
            "op": "not in",
            "dim": "user_tag$gender",
            "value": [
                "female"
            ]
        }
    ],
    "rule_expr": "!R3 | (R1 & R2)"
}

idString单个过滤规则的唯一标识(仅支持字母数字下划线$)
"R1" 或 "_filter0"
opString

目前仅支持 in / not in (大小写兼容)

in: 正向过滤条件

not in: 反向过滤条件

"in" 或 "not in"
dimString可做过滤的维度
"city" 或 "user_tag"
valueArray[Any]需要过滤的维度值集合
["A", "B"] 或 [10, 6]

表达式核心逻辑:

package dev

import java.util
import java.util.regex.Pattern
import org.json4s.DefaultFormats
import org.json4s.jackson.Serialization.read
import scala.collection.mutable.ArrayBuffer

/**
 * 实验参数表达式计算核心逻辑
 */
object FilterExprCalculator {

  private val EXPR_CALC_SINGLE_VALUE_OP_LIST = List("!")

  private val EXPR_CALC_DOUBLE_VALUE_OP_LIST = List("&", "|")


  /**
   * 解析配置参数过滤规则
   * @param strategyStr 过滤规则表达式
   * @return 后缀表达式过滤器
   */
  def parseStrategyParams(strategyStr: String): Option[StrategyExpr] = {
    implicit val defaultFormats = DefaultFormats
    if (strategyStr.isEmpty) {
      println(s"Filter strategyStr is empty! Nothing will be filtered!")
      None
    } else {
      val strategyParams = read[StrategyParams](strategyStr)
      val ruleMap = strategyParams.filter_rules.map(item => item.id -> item).toMap
      Some(StrategyExpr(ruleMap, parseExpr(strategyParams.rule_expr)))
    }
  }

  /**
   * 过滤规则表达式解析: 中缀表达式 -> 后缀表达式
   * @param expr 中缀表达式 eg. !(R4 | R3 & R5) | !R6 & !(R1 & !R2)
   * @return 后缀表达式数组 eg. Array("R4", "R3", "|", "R5", "&", "!", "R6", "!", "|", "R1", "R2", "!", "&", "!", "&")
   */
  def parseExpr(expr: String): Array[String] = {
    // 后缀表达式
    val suffixExpr = ArrayBuffer.empty[String]
    val exprStr = expr.replaceAll(" ", "")
    var temp = new StringBuffer()
    // 运算符栈
    val symbolStack: util.Stack[String] = new util.Stack[String]
    exprStr.foreach{ ch =>
      // 普通规则标识符
      if (isExprRuleId(ch)) {
        temp.append(ch)
      } else {
        // 规则标识符入栈
        if (temp.toString.nonEmpty) {
          suffixExpr += temp.toString
          temp = new StringBuffer()
        }
        // 操作符处理
        if (ch == ')') {
          var scanFlag = true
          while(scanFlag && !symbolStack.isEmpty) {
            val op = symbolStack.pop()
            if (op == "(") scanFlag = false else suffixExpr += op
          }
        } else if (ch == '(' || symbolStack.isEmpty) {
          symbolStack.push(ch.toString)
        } else if ( !symbolStack.isEmpty && symbolStack.peek() != "(") {
          while(!symbolStack.isEmpty && symbolStack.peek() != "(" && comparePriority(ch.toString) >= comparePriority(symbolStack.peek())) {
            suffixExpr += symbolStack.pop()
          }
          symbolStack.push(ch.toString)
        } else {
          symbolStack.push(ch.toString)
        }
      }
    }
    // 最后一个元素是标识符
    if (temp.toString.nonEmpty) {
      suffixExpr += temp.toString
      temp = new StringBuffer()
    }
    // 操作符栈不为空
    while(!symbolStack.isEmpty) {
      suffixExpr += symbolStack.pop()
    }
    println(s"Parse Filter rule_expr to suffix_expr,\n\trule_expr =>\n\t\t$expr\n\tsuffix_expr =>\n\t\t${suffixExpr.mkString(" ")}")
    suffixExpr.toArray
  }

  /**
   * 规则过滤匹配逻辑
   * @param strategy 过滤规则参数
   * @param valueMap 匹配map
   * @return true/false
   */
  def calculate(strategy: StrategyExpr, valueMap: Map[String, AnyVal]): Boolean = {
    // 运算中间结果存储栈
    val calcStack: util.Stack[Boolean] = new util.Stack[Boolean]
    strategy.suffixExpr.foreach{ expr =>
      val calcV = expr match {
        case s if EXPR_CALC_SINGLE_VALUE_OP_LIST.contains(s) =>
          // 单值操作符
          val topBoolean = calcStack.pop()
          s match {
            case "!" => !topBoolean
            case _ => throw new IllegalArgumentException(s"Can't recognize single value op: $s")
          }
        case d if EXPR_CALC_DOUBLE_VALUE_OP_LIST.contains(d) =>
          // 双值操作符
          val (top1, top2) = (calcStack.pop(), calcStack.pop)
          d match {
            case "|" => top2 || top1
            case "&" => top2 && top1
            case _ => throw new IllegalArgumentException(s"Can't recognize double value op: $d")
          }
        case ruleId =>
          // ruleId获取过滤规则结果
          val filterRule = strategy.ruleMap.getOrElse(ruleId,
            throw new IllegalArgumentException(s"Lost rule_id $ruleId in strategy, strategy ruleSuffixExpr: ${strategy.suffixExpr}"))
          if (valueMap.contains(filterRule.dim)) {
            val filterRuleRes = filterRule.value.contains(valueMap.getOrElse(filterRule.dim, ""))
            filterRule.op.toUpperCase.trim match {
              case "IN" => filterRuleRes
              case "NOT IN" => !filterRuleRes
              case _ => throw new IllegalArgumentException(s"Can't recognize config filter_rules.op: ${filterRule.op}")
            }
          } else true // 数据map中没有该key默认返回true
      }
      calcStack.push(calcV)
    }
    assert(calcStack.size() == 1, throw new Exception(s"FilterExpr calc error. calcStack size != 1, ruleSuffixExpr: ${strategy.suffixExpr}"))
    calcStack.pop()
  }

  private val pattern = Pattern.compile("[0-9a-zA-Z_$]")

  /**
   * 属于标识符id
   * @param ch 当前判定字符属于 字母/数字/下划线/$
   * @return true/false
   */
  private def isExprRuleId(ch: Char): Boolean = {
    pattern.matcher(ch.toString).matches()
  }

  /**
   * 运算符优先级
   * @param op 操作符
   * @return 优先级 (1 > 0 > -1)
   */
  private def comparePriority(op: String): Int = {
    op match {
      case c if EXPR_CALC_DOUBLE_VALUE_OP_LIST.contains(c) => 1
      case c if EXPR_CALC_SINGLE_VALUE_OP_LIST.contains(c) => 0
      case _ => -1
    }
  }

}

case class ExpFilterRule(id: String, op: String, dim: String, value: Array[AnyVal])

case class StrategyExpr(ruleMap: Map[String, ExpFilterRule], suffixExpr: Array[String])

case class StrategyParams(filter_rules: Array[ExpFilterRule], rule_expr: String)

测试类:

package dev

object FilterTest {

  def main(args: Array[String]): Unit = {
    val params =
      """
        |{
        |    "filter_rules": [
        |        {
        |            "id": "R1",
        |            "op": "in",
        |            "dim": "event_type",
        |            "value": [
        |                "visit",
        |                "custom"
        |            ]
        |        },
        |        {
        |            "id": "R2",
        |            "op": "in",
        |            "dim": "city_id",
        |            "value": [
        |                10, 6
        |            ]
        |        },
        |        {
        |            "id": "R3",
        |            "op": "not in",
        |            "dim": "user_tag$gender",
        |            "value": [
        |                "female"
        |            ]
        |        }
        |    ],
        |    "rule_expr": "!R3 | (R1 & R2)"
        |}
        |""".stripMargin

    val valueMap: Map[String, AnyVal] = Map("event_type" -> "custom", "city_id" -> 6, "user_tag$gender" -> "male").asInstanceOf[Map[String, AnyVal]]

    val strategyExpr = FilterExprCalculator.parseStrategyParams(params)
    val res = FilterExprCalculator.calculate(strategyExpr.get, valueMap)
    println(res)
  }

}

 运行结果:

 

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值