1.目的
在线过滤掉黑名单的点击,防止刷点击刷评分刷票数等行为
2.素材
1)mysql建立blacklist表
mysql> select * from blacklist;
+--------+--------+
| name | status |
+--------+--------+
| hadoop | true |
| spark | true |
+--------+--------+
2) socket输入模拟点击log
启动linux上的netcat程序
nc -lk 9999
输入字符
20170901141258 tom
20170901141301 hadoop
20170901141306 jesse
3.代码
/**
* Created by puwenchao on 2017-09-06.
*/
package Streaming
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.sql.SQLContext
case class AD(time:String,name: String)
object streaming_blacklist_sql {
def main(args:Array[(String)]): Unit ={
//设定日志等级
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//创建上下文,设置batch时间间隔5s
val conf = new SparkConf().setAppName("streaming_blacklist_sql").setMaster("local[4]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val ssc = new StreamingContext(sc, Seconds(5))
import sqlContext.implicits._
//从数据库中加载blacklist表
val blacklistDF = sqlContext.read.format("jdbc").options(Map(
"url"-> "jdbc:mysql://192.168.252.141:3306/test",
"driver"->"com.mysql.jdbc.Driver",
"dbtable"->"blacklist",
"user"->"root",
"password"-> "mysql"
)).load()
blacklistDF .registerTempTable("blicklist")
//从socket中接收广告点击数据,并转为case class中定义的那种格式
val adsClick = ssc.socketTextStream("192.168.252.141", 9999, StorageLevel.MEMORY_ONLY)
val adsClickPar = adsClick.map(_.split(" ")).map(ads => AD(ads(0), ads(1)))
// 隐式转换为DF并注册为临时表,再用SQL语句查询并打印出来
adsClickPar.foreachRDD ( ads => {
ads.toDF().registerTempTable("adclick")
val sql_str = "select a.time,a.name " +
"from adclick a left join blicklist b " +
"on a.name=b.name " +
"where b.status is null"
val normalClick = sqlContext.sql(sql_str)
normalClick.show()
}
)
//计算后的有效数据一般都会写入Kafka中,下游的计费系统会从kafka中pull到有效数据进行计费
ssc.start()
ssc.awaitTermination()
}
}
4.输出
+---------------------+------+
| time|name|
+---------------------+------+
|20170901141258| tom|
+---------------------+------+
|20170901141306|jesse|
+---------------------+------+