利用sparkstreaming实现hdfs文件系统中的某个目录下的wordcount
代码如下:
package sparkTestJava;
import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
public class HDFSWordCount {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("wordcount"