sparkstreaming CDH6.x 简单实例

1.导入jar 包

Scala安装目录下的lib包

2.编写简单统计代码

import org.apache.spark.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.*;
import org.apache.spark.streaming.api.java.*;

import scala.Tuple2;
import java.util.Arrays;

public class sparkStreamingUnit {
    public static void main(String[] args) throws Exception {
        System.out.println("Spark Streaming Unit Test!");

        // Create a local StreamingContext with two working thread and batch interval of 1 second
        //SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("SparkStreamingWordCountDemo");
        SparkConf conf = new SparkConf().setMaster("yarn").setAppName("SparkStreamingWordCountDemo");
        JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
        jssc.sparkContext().setLogLevel("ERROR"); 
        // Create a DStream that will connect to hostname:port, like localhost:9999
        JavaReceiverInputDStream<String> lines = jssc.socketTextStream("192.168.32.132", 9999);
      // Split each line into words
        JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
      // Count each word in each batch
        JavaPairDStream<String, Integer> pairs = words.mapToPair(s -> new Tuple2<>(s, 1));
        JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey((i1, i2) -> i1 + i2);
       // Print the first ten elements of each RDD generated in this DStream to the console
        wordCounts.print();

        jssc.start();              // Start the computation
        jssc.awaitTermination();   // Wait for the computation to terminate

        jssc.stop();
 }
}

3.编写submit shell脚本,并上传至spark集群

4.运行sparkstreaming 和netcat 程序

//CBO优化
    sparkConf.set("spark.sql.cbo.enabled","true")
    sparkConf.set("spark.sql.cbo.joinReorder.enabled","true")
    sparkConf.set("spark.sql.statistics.histogram.enabled","true")
    //自适应查询优化(2.4版本之后)
    sparkConf.set("spark.sql.adaptive.enabled","true")
    //开启consolidateFiles
    sparkConf.set("spark.shuffle.consolidateFiles","true")
    //设置并行度
    sparkConf.set("spark.default.parallelism","150")
    //设置数据本地化等待时间
    sparkConf.set("spark.locality.wait","6s")
    //设置mapTask写磁盘缓存
    sparkConf.set("spark.shuffle.file.buffer","64k")
    //设置byPass机制的触发值
    sparkConf.set("spark.shuffle.sort.bypassMergeThreshold","1000")
    //设置resultTask拉取缓存
    sparkConf.set("spark.reducer.maxSizeInFlight","48m")
    //设置重试次数
    sparkConf.set("spark.shuffle.io.maxRetries","10")
    //设置重试时间间隔
    sparkConf.set("spark.shuffle.io.retryWait","10s")
    //设置reduce端聚合内存比例
    sparkConf.set("spark.shuffle.memoryFraction","0.5")
    //设置序列化
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    //设置自动分区
    sparkConf.set("spark.sql.auto.repartition","true")
    //设置shuffle过程中分区数
    sparkConf.set("spark.sql.shuffle.partitions","500")
    //设置自动选择压缩码
    sparkConf.set("spark.sql.inMemoryColumnarStorage.compressed","true")
    //关闭自动推测分区字段类型
    sparkConf.set("spark.sql.source.partitionColumnTypeInference.enabled","false")
    //设置spark自动管理内存
    sparkConf.set("spark.sql.tungsten.enabled","true")
    //执行sort溢写到磁盘
    sparkConf.set("spark.sql.planner.externalSort","true")
    //增加executor通信超时时间
    sparkConf.set("spark.executor.heartbeatInterval","60s")
    //cache限制时间
    sparkConf.set("spark.dynamicAllocation.cachedExecutorIdleTimeout","120")
    //设置广播变量
    sparkConf.set("spark.sql.autoBroadcastJoinThreshold","104857600")
    //其他设置
    sparkConf.set("spark.sql.files.maxPartitionBytes","268435456")
    sparkConf.set("spark.sql.files.openCostInBytes","8388608")
    sparkConf.set("spark.debug.maxToStringFields","500")
    //推测执行机制
    sparkConf.set("spark.speculation","true")
    sparkConf.set("spark.speculation.interval","500")
    sparkConf.set("spark.speculation.quantile","0.8")
    sparkConf.set("spark.speculation.multiplier","1.5")

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值