1. 安装scala-安装spark
附镜像地址http://www-eu.apache.org/dist/
tar -zxvf scala-2.11.0.tgz -C ~/software/
vi ~/.bashrc
export SCALA_HOME=/home/jarvanl/software/scala-2.11.0
export PATH=$PATH:$SCALA_HOME/bin
export SPARK_HOME=/home/jarvanl/software/spark-2.0.0-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin
source ~/.bashrc
报错spark-env.sh中添加配置:
export SPARK_MASTER_IP=127.0.0.1
export SPARK_LOCAL_IP=127.0.0.1
启动
./bin/spark-shell --master local[2]
测试
var rdd=sc.textFile("file:///home/jarvanl/tmp01/b.txt")
var wordcount = rdd.flatMap(x => x.split(" ")).map(x => (x,1)).reduceByKey((a,b) => a+b)
wordcount.collect()
var wordsort=wordcount.sortByKey(false).collect()
val wordcount=file.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
wordcount.collect()
val wordsort = wordcount.map(x => (x._2,x._1)).sortByKey(false).map(x => (x._2,x._1)).collect()
wordsort.collect()
wordcount.saveAsTextFile("file:///home/jarvanl/tmp01/out")
val num=sc.parallelize(1 to 10)
val doublenum = num.map(_*2)
val threenum = doublenum.filter(_ % 3 == 0)
threenum.collect
2. 安装zeppelin
tar -zxvf zeppelin-0.6.2-bin-all.tgz -C ~/software/
启动、关闭Zeppelin进程命令为:
bin/zeppelin-daemon.sh start
bin/zeppelin-daemon.sh stop
设置环境变量conf/zeppelin-env.sh
export SPARK_HOME=/home/jarvanl/software/spark-2.0.0-bin-hadoop2.7
使用页面
http://localhost:8080/
通过标识%md, %sh, %sql, %spark, %hive, %tajo来区分要执行的是什么,默认不写的话,执行环境是scala。
在 http://127.0.0.1:8080/#/interpreter 页面里有详细的参数说明。
%spark
val num=sc.parallelize(1 to 10)
val doublenum = num.map(_*2)
val threenum = doublenum.filter(_ % 3 == 0)
threenum.collect