主要思路:SparkContext由spring容器管理,在spring容器启动完毕后,执行spark-streaming-kafka,获取数据并处理。
1.spring容器中初始化SparkContext,代码片段如下:
@Bean
@ConditionalOnMissingBean(SparkConf.class)
public SparkConf sparkConf() {
SparkConf conf = new SparkConf()
.setAppName(sparkAppName)
.setMaster(sparkMasteer).set("spark.driver.memory",sparkDriverMemory)
.set("spark.worker.memory",sparkWorkerMemory)//"26g".set("spark.shuffle.memoryFraction","0") //默认0.2
.set("spark.executor.memory",sparkExecutorMemory)
.set("spark.rpc.message.maxSize",sparkRpcMessageMaxSize);
// .setMaster("local[*]");//just use in test
return conf;
}
@Bean
@ConditionalOnMissingBean(JavaSparkContext.class) //默认: JVM 只允许存在一个sparkcontext
public JavaSparkContext javaSparkContext(@Autowired SparkConf sparkConf) {
return new JavaSparkContext(sparkConf);
}
2.spar