单机版Spark安装、WordCount例子、IDEA实现、Jar包导出使用学习记录

 单机版Spark安装

[root@linux01 install]# tar zxf spark-2.4.5-bin-hadoop2.6.tgz -C ../soft
[root@linux01 soft]# mv spark-2.4.5-bin-hadoop2.6.tgz spark245
[root@linux01 conf]# cp spark-env.sh.template spark-env.sh
[root@linux01 conf]# vi ./spark-env.sh
最后一行添加
export JAVA_HOME=/opt/soft/jdk180
export SCALA_HOME=/opt/soft/scala211
export SPARK_HOME=/opt/soft/spark245
export HADOOP_INSTALL=/opt/soft/hadoop260
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
export SPARK_MASTER_IP=linux01
export SPARK_DRIVER_MEMORY=2G
export SPARK_EXECUTOR_MEMORY=2G
export SPARK_LOCAL_DIRS=/opt/soft/spark245

[root@linux01 ~]# vi /etc/profile
最后一行添加
#scala
export SCALA_HOME=/opt/soft/scala211
export PATH=$PATH:$SCALA_HOME/bin
#spark
export SPARK_HOME=/opt/soft/spark245
export PATH=$PATH:$SPARK_HOME/bin

[root@linux01 conf]# cd /opt/soft/spark245/sbin/
[root@linux01 sbin]# ./start-all.sh 

[root@linux01 ~]# spark-shell

 WordCount例子

Spark函数
++                    countByValue            histogram                partitions           saveAsTextFile   toLocalIterator   
aggregate             countByValueApprox      id                       persist              setName          toString          
barrier               dependencies            intersection             pipe                 sortBy           top               
cache                 distinct                isCheckpointed           popStdev             sparkContext     treeAggregate     
canEqual              filter                  isEmpty                  popVariance          stats            treeReduce        
cartesian             first                   iterator                 preferredLocations   stdev            union             
checkpoint            flatMap                 keyBy                    productArity         subtract         unpersist         
coalesce              fold                    localCheckpoint          productElement       sum              variance          
collect               foreach                 map                      productIterator      sumApprox        zip               
collectAsync          foreachAsync            mapPartitions            productPrefix        take             zipPartitions     
compute               foreachPartition        mapPartitionsWithIndex   randomSplit          takeAsync        zipWithIndex      
context               foreachPartitionAsync   max                      reduce               takeOrdered      zipWithUniqueId   
copy                  getCheckpointFile       mean                     repartition          takeSample                         
count                 getNumPartitions        meanApprox               sample               toDF                               
countApprox           getStorageLevel         min                      sampleStdev          toDS                               
countApproxDistinct   glom                    name                     sampleVariance       toDebugString                      
countAsync            groupBy                 partitioner              saveAsObjectFile     toJavaRDD 




交互
[root@linux01 conf]# spark-shell --master local[*]
[root@linux01 conf]# spark-shell --master spark://linux01:7077
[root@linux01 conf]# spark-shell --master yarn-client


scala> sc.textFile("hdfs://linux01:9000/sparktmp/hello.txt")

scala> sc.textFile("hdfs://linux01:9000/sparktmp/hello.txt").flatMap(x=>x.split(" "))

scala> sc.textFile("hdfs://linux01:9000/sparktmp/hello.txt").flatMap(x=>x.split(" "))
.map(x=>(x,1)).reduceByKey((x,y)=>x+y)

scala> sc.textFile("hdfs://linux01:9000/sparktmp/hello.txt").flatMap(x=>x.split(" "))
.map(x=>(x,1)).reduceByKey((x,y)=>x+y)
.collect.foreach(println)

 IDEA实现 WordCount例子

找寻log4j.properties

 

 

package cn.kgc.kb15

import java.io.FileInputStream
import java.util.Properties

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}

object SparkDemo {
  def main(args: Array[String]): Unit = {
    val conf:SparkConf=new SparkConf().setMaster("local[*]").setAppName("sparkDemo")
    val sc:SparkContext=SparkContext.getOrCreate(conf)
    println(sc)
    val spark:SparkSession=SparkSession.builder().master("local[*]").appName("sparkDemo").getOrCreate()
    println(spark)

//    val rdd:RDD[String]=sc.textFile("hdfs://192.168.111.131:9000/sparktmp/hello.txt")
//    println("-------------------------------rdd")
//    rdd.collect.foreach(println)
//    val rdd2:RDD[String]=rdd.flatMap(x=>x.split(" "))
//    println("-------------------------rdd2")
//    rdd2.collect().foreach(println)
//    val rdd3:RDD[(String,Int)]=rdd2.map(x=>(x,1))
//    println("----------------------------rdd3")
//    rdd3.collect().foreach(println)
//    val rdd4:RDD[(String,Int)]=rdd3.reduceByKey((x,y)=>x+y)
//    println("---------------------------rdd4")
//    rdd4.collect().foreach(println)

    //sc 读取本地文件创建 rdd
//    val rdd11:RDD[String]=sc.textFile("E:\\创建工程\\sparkStu2021_12_06\\in\\helloworld.txt")
//    rdd11.collect().foreach(println)
//    println("-------------------------------------------")
//    rdd11.flatMap(x=>x.split(" ")).map((_,1)).reduceByKey(_+_).collect().foreach(println)

//    val rdd:RDD[Int]=sc.parallelize(1 to 10)
//    val rdd1:RDD[String]=sc.parallelize(Array("hello java","hello hive","hello spark"))
//    val rdd2:RDD[Int]=sc.makeRDD(Array(1,2,3,4))
//    rdd.collect().foreach(println)
//    rdd1.collect().foreach(println)
//    rdd2.collect().foreach(println)


//    val path=Thread.currentThread().getContextClassLoader.getResource("test.properties").getPath
//    println(path)

    val prop =new Properties()
    prop.load(new FileInputStream(Consol.Target_File))
    val target=prop.getProperty(Consol.PATH)
    println(target)
    val rdd11:RDD[String]=sc.textFile(target)

    /**
      * public static String Target_File="/opt/sparktmp/test.properties";
      *
      * public static String PATH="path";
      */
    rdd11.flatMap(x=>x.split(" ")).map((_,1)).reduceByKey(_+_).collect().foreach(println)

//


//    val prop =new Properties()
//    prop.load(new FileInputStream("/opt/sparktmp/test.properties"))
//    val target=prop.getProperty("path")
//    println(target)
//    val rdd11:RDD[String]=sc.textFile(target)
//    rdd11.flatMap(x=>x.split(" ")).map((_,1)).reduceByKey(_+_).collect().foreach(println)

//val prop =new Properties()
//    prop.load(new FileInputStream("E:\\创建工程\\sparkStu2021_12_06\\resources\\test.properties"))
//    val target=prop.getProperty("path")
//    println(target)
//    val rdd11:RDD[String]=sc.textFile(target)
//    rdd11.flatMap(x=>x.split(" ")).map((_,1)).reduceByKey(_+_).collect().foreach(println)




  }

}
package cn.kgc.kb15;

public class Consol {
    /**
     *spark 获取文件时读取的指定文件夹
     */
    public static String Target_File="/opt/sparktmp/test.properties";

    public static String PATH="path";
}

 导出Jar包

 

spark-submit --class cn.kgc.kb15.SparkDemo  --master local[*] ./jar包
spark-submit --class cn.kgc.kb15.SparkDemo  --master spark://linux01:7077 local[*] ./jar包
spark-submit --class cn.kgc.kb15.SparkDemo  --master yarn-client ./jar包


(scala,1)
(are,1)
(you,1)
(hive,1)
(how,1)
(hello,5)
(java,1)
(hbase,1)
(world,1)

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值