Kafka+SparkStreaming-实战入门(Java版)

Producer.class

package com.fromjoker;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

public class MyKafka {
    public static Map<String, String> kafkaConsumerParams = new HashMap<>();
    public static Properties kafkaProducerParams = new Properties();
    public static String SERVER = "kafka-0.kafka.default.svc.cluster.local:9092,kafka-1.kafka.default.svc.cluster.local:9092,kafka-2.kafka.default.svc.cluster.local:9092";

    public static Map<String, String> consumerDefaultConf(){
        kafkaConsumerParams.put("metadata.broker.list",SERVER);
        kafkaConsumerParams.put("group.id", "apsnew2");
        kafkaConsumerParams.put("fetch.message.max.bytes", "52428800");
        return kafkaConsumerParams;
    }
    public static void producerDefaultConf(){
        kafkaProducerParams.put("bootstrap.servers",SERVER);
        kafkaProducerParams.put("acks", "all");
        kafkaProducerParams.put("retries", 0);
        kafkaProducerParams.put("batch.size", 16384);
        kafkaProducerParams.put("linger.ms", 1);
        kafkaProducerParams.put("buffer.memory", 33554432);
        kafkaProducerParams.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        kafkaProducerParams.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    }
    public static void producer(String topic,String key,String value){
        producerDefaultConf();
        Producer<String, String> producer = new KafkaProducer<>(kafkaProducerParams);
        String key = "who am i";
        String value = "i am joker";
        try {
            producer.send(new ProducerRecord<String, String>(topic,key,value));
        }catch(Exception e){
            e.printStackTrace();
        }finally {
            producer.close();
        }
    }
}

Consumer.class

package com.fromjoker;

import kafka.serializer.StringDecoder;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import scala.Tuple2;

import java.util.HashSet;
import java.util.Set;

//标准工时看板
public class MyConsumer {
    private static Logger logger = Logger.getLogger(MyConsumer.class);
    public static SparkSession spark = null;

    public static void main(String[] args)throws Exception{
        logger.getLogger("org").setLevel(Level.ERROR);
        spark = new MySpark().init();
        JavaStreamingContext streamingContext = new JavaStreamingContext(JavaSparkContext.fromSparkContext(spark.sparkContext()), new Duration(5000));
        Set<String> topicsSet = new HashSet<>();
        topicsSet.add("test");
        JavaPairInputDStream<String, String> message =
                KafkaUtils.createDirectStream(
                        streamingContext,
                        String.class,
                        String.class,
                        StringDecoder.class,
                        StringDecoder.class,
                        MyKafka.consumerDefaultConf(),
                        topicsSet
                );
        JavaDStream<String> valueDStream = message.map(new Function<Tuple2<String, String>, String>() {
            public String call(Tuple2<String, String> v1) throws Exception {
                return v1._1()+"_"+v1._2;
            }
        });
        valueDStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
            @Override
            public void call(JavaRDD<String> json) throws Exception {
                if(!json.isEmpty()) {
                    json.foreach(new VoidFunction<String>() {
                        @Override
                        public void call(String s) throws Exception {
                            System.out.println(s);
                        }
                    });
                }
            }
        });

        streamingContext.start();
        streamingContext.awaitTermination();
    }
}

MySpark.class

package com.fromjoker;


import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;

public class MySpark {
    public SparkSession spark = null;
    public SparkConf    conf  = null;

    public SparkSession init(){
        SparkSession.clearDefaultSession();
        conf = new SparkConf();
        defaultconf();
        spark = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
        return spark;
    }
    public SparkSession getSpark() {
        return spark;
    }
    public void setSpark(SparkSession spark) {
        this.spark = spark;
    }
    public SparkConf getConf() {
        return conf;
    }
    public void setConf(SparkConf conf) {
        this.conf = conf;
    }

    //spark默认通用参数
    public SparkConf defaultconf(){
        //conf.setMaster("yarn");
        conf.setMaster("local[4]");
        conf.setAppName("aps");
        conf.set("spark.port.maxRetries", "1280");
        conf.set("spark.sql.warehouse.dir",
                "hdfs://hadoop-master-0.hadoop-master.default.svc.cluster.local:8020/user/hive/warehouse");
        //忽略损坏的分区数据
        conf.set("spark.sql.hive.verifyPartitionPath", "true");
        conf.set("spark.io.compression.codec", "snappy");
        conf.set("spark.testing.memory", "2147480000");//后面的值大于512m即可
        // conf.set("spark.shuffle.service.enabled", "true");
        conf.set("spark.dynamicAllocation.enabled","false");
        conf.set("spark.streaming.stopGracefullyOnShutdown","true");//优雅的关闭
        conf.set("spark.rpc.askTimeout", "600s");
        conf.set("spark.sql.autoBroadcastJoinThreshold", "-1");
        // 任务运行核心数
        conf.set("spark.executor.cores", "4");
        conf.set("spark.cores.max", "21");
        // executor运行内存
        conf.set("spark.executor.memory", "6g");
        conf.set("spark.driver.memory", "15g");
        conf.set("spark.sql.shuffle.partitions", "400");
        //机器之间的可以重用的网络连接,主要用于在大型集群中减小网络连接的建立开销,如果一个集群的机器并不多,可以考虑增加这个值
        conf.set("spark.shuffle.io.numConnectionsPerPeer", "10");
        // 关闭自动推测分区字段的类型
        conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false");
        // 动态分区
        conf.set("hive.exec.dynamic.partition","true");
        conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false");
        conf.set("hive.exec.dynamic.partition.mode", "nonstrict");
        conf.set("dfs.client.socket-timeout","300000");
        //通过主机名访问(master设置datanode的内网ip时,客户端获取的是内网地址,无法访问)
        conf.set("dfs.client.use.datanode.hostname", "true");
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        return conf;
    }

    //手动设置spark参数
    public void setsparkconf(String parameter,String size){
        conf.set(parameter,size);
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值