spark_黑名单过滤

wedasasda

数据如下:

3333 flume
4444 ooize
5555 flume
4444 ooize
5555 flume
2222 hive
3333 hadoop
4444 hbase
3333 flume
4444 ooize
5555 flume
flume 1
hadoop 2


import java.io.Serializable;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.StorageLevels;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

import scala.Tuple2;

import org.apache.spark.streaming.Durations;

public class finaltest {

	public static void main(String[] args) throws InterruptedException {
		// TODO Auto-generated method stub
		//1.获取实时数据
		SparkConf sparkConf = new SparkConf().setAppName("Streaming").setMaster("local[2]");
		JavaStreamingContext ssc = new JavaStreamingContext(sparkConf,Durations.seconds(60));
		JavaReceiverInputDStream<String> lines = ssc.socketTextStream(args[0], Integer.parseInt(args[1]),StorageLevels.MEMORY_AND_DISK_SER);
		//2.处理数据,获得每天对某个广告点击超过N次的用户
		JavaPairDStream<String,String> data = lines.mapToPair(f -> new Tuple2<>(f.split(",")[0],f.split(",")[1]));
		data.foreachRDD(rdd -> {
			JavaRDD<Advert> adRDD = rdd.map(f -> {
				Advert ad = new Advert();
				ad.setUserId(f._1);
				ad.setAdvertId(f._2);
				return ad;
			});
		
		SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
		Dataset<Row> words = spark.createDataFrame(adRDD,Advert.class);
		words.createOrReplaceGlobalTempView("words");
		Dataset<Row> result = spark.sql("select userId from (select userId,advertId,count(*) from words group by userId,advertId having count(*) > 2 a");
		//3.将实时产生的黑名单存入MYSQL数据库	
		result.write().format("jdbc")
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值