现在,网上基于spark的代码基本上都是Scala,很多书上也都是基于Scala,没办法,谁叫spark是Scala写出来的了,但是我现在还没系统的学习Scala,所以只能用java写spark程序了,spark支持java,而且Scala也基于JVM,不说了,直接上代码
这是官网上给出的例子,大数据学习中经典案例单词计数
在linux下一个终端 输入 $ nc -lk 9999
然后运行下面的代码
package com.tg.spark.stream;
import java.util.Arrays;
import org.apache.spark.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.*;
import org.apache.spark.streaming.api.java.*;
import scala.Tuple2;
/**
*
* @author 汤高
*
*/
public class SparkStream {
public static void main(String[] args) {
// Create a local StreamingContext with two working thread and batch
// interval of 1 second
SparkConf conf = new SparkConf().setMaster("local[4]").setAppName("NetworkWordCount").set("spark.testing.memory",
"2147480000");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(1));
System.out.println(jssc);
// Create a DStream that will connect to hostname:port, like
// localhost:9999
JavaReceiverInputDStream<String> lines = jssc.socketTextStream("master"