package com.wcount import java.io.{File, PrintWriter} import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object ScalaWordCount { def main(args : Array[String]) : Unit = { /** * SparkConf:表示spark application的参数, * setMaster:表示运行的模式: * * local:本地模式,一般用于测试 * standalone:spark集群自带的资源调度模式 * yarn:hadoop * mesos:资源调度框架 * setAppName:设置application的名称 */ val conf = new SparkConf().setMaster( "local" ).setAppName( "workJob" ) /** * SparkContext:spark application的上下文环境,通往集群的唯一入口 */ val sc = new SparkContext(conf) // val session: SparkSession = SparkSession.builder.appName("wc").master("local").getOrCreate() val lines : RDD[String] = sc.textFile( "./data/wc.txt" ) val words : RDD[String] = lines.flatMap(line = > { println( "flatmap..........." ) line.split( " " ) }) val tuple : RDD[(String, Int)] = words.map(word = > { println( "map............" ) new Tuple 2 (word, 1 ) }) val result : RDD[(String, Int)] = tuple.reduceByKey((v 1 : Int, v 2 : Int) = > v 1 + v 2 ) //result.foreach(println) //文件写入 val outWriter = new PrintWriter( new File( "./data/out.txt" )) var wt : String = "" for (item<-result){ wt = item. _ 1 .toString+ ":" +item. _ 2 .toString+ " " println(wt) } println(wt) outWriter.println(wt) outWriter.close() while ( true ){ } // sc.textFile("./data/wc").flatMap(line => {line.split(" ")}).map(word => {new Tuple2(word, 1)}).reduceByKey((v1: Int, v2: Int) => v1 + v2).foreach(println) sc.stop() } } |