【README】
安装软件清单(仅在本机调试, 不提交到集群,不写scala,可以直接跳过安装):
- scala;
- hadoop;
- winutils;
- spark;
【1】 本地环境搭建(非常简单)
1)新增 spark maven 依赖:(参见: https://mvnrepository.com/artifact/org.apache.spark/spark-core)
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.13</artifactId>
<version>3.5.0</version>
</dependency>
2)新增vm参数 (-Dspark.master=local)
(或者 new SparkConf().setMaster("local") 是同样的效果 )
3)wordcount例子1:
public class SparkExample01 {
private static final String FILE_PATH = "src/main/resources/helloworld.txt";
public static void main(String[] args) {
SparkConf sparkCfg = new SparkConf().setMaster("local").setAppName("sparkCfg");
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkCfg);
JavaRDD<String> lineRdd = javaSparkContext.textFile(FILE_PATH);
JavaRDD<String> wordRdd = lineRdd.flatMap(line -> Arrays.stream(line.split(" ")).iterator());
JavaPairRDD<String, Integer> wordCountRdd = wordRdd.mapToPair(word -> new Tuple2<>(word, 1)).reduceByKey(Integer::sum);
wordCountRdd.foreach(wordCount -> System.out.println(wordCount._1() + "-" + wordCount._2()));
}
}
【2】 wordcount 例子2
public class SparkWordCounter {
private static final String inputFileName = "D:\\temp\\spark\\input.txt";
private static final String outputFileName = "D:\\temp\\sparkoutput\\";
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "D://software_install_dir//winutils-master//hadoop-3.2.0//");
new File(outputFileName).delete();
wordCount();
}
public static void wordCount() {
SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("sparkCore3.0");
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);
JavaRDD<String> fileRdd = javaSparkContext.textFile(inputFileName);
JavaRDD<String> fileWordRdd = fileRdd.flatMap(line -> Arrays.asList(line.split(" ")).iterator());
JavaPairRDD<String, Integer> fileWordPairRdd = fileWordRdd.mapToPair(word -> new Tuple2<>(word, 1)).reduceByKey((x, y) -> x + y);
System.out.println("【打印】" + fileWordPairRdd.count());
fileWordPairRdd.saveAsTextFile(outputFileName);
}
}