1.本地LOCAL环境安装Spark并试运行配置(在Ubuntu系统下例子)
# 打开文件配置环境变量: JAVA,SCALA,SPARK,HADOOP,SBT
gedit /etc/profile
# 在文件中加入以下行
export JAVA_HOME=/usr/java/jdk1.8.0_51
export PATH=$JAVA_HOME/bin:
$PATH
export CLASSPATH=
$CLASSPATH
:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export SCALA_HOME=/usr/scala/scala-2.11.7
export PATH=$SCALA_HOME/bin:
$PATH
export SPARK_HOME=/usr/spark/spark-1.4.1
-bin
-without
-hadoop
export PATH=$SPARK_HOME/bin:
$PATH
export SBT_HOME=/usr/scala/sbt
export PATH=$SBT_HOME/bin:
$PATH
export HADOOP_HOME=/usr/hadoop/hadoop-2.7.0
export PATH=$HADOOP_HOME/bin:
$PATH
export CLASSPATH=
$CLASSPATH
:$HADOOP_HOME/lib
# 更新系统文件
source /etc/profile
|
修改 Spark的配置文件 Spark-env.sh,将Spark-env.sh.template 文件修改名称并添加以下环境变量和类变量
export SCALA_HOME=/usr/scala/scala-2.11.7
export JAVA_HOME=/usr/java/jdk1.8.0_51
export HADOOP_CONF_DIR=/usr/hadoop/hadoop-2.7.0/etc/hadoop
export SPARK_LOCAL_IP=localhost
export SPARK_PUBLIC_DNS=localhost
export SPARK_CLASSPATH=${HADOOP_HOME}/share/hadoop/common/hadoop
-common
-2.7.0.jar:${HADOOP_HOME}/share/hadoop/common/hadoop
-nfs
-2.7.0.jar
export SPARK_CLASSPATH=${SPARK_CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar:${HADOOP_HOME}/share/hadoop/common/lib/slf4j
-api
-1.7.10.jar:${HADOOP_HOME}/share/hadoop/common/lib/log4j-1.2.17.jar:${HADOOP_HOME}/share/hadoop/common/lib/commons
-configuration
-1.6.jar:${HADOOP_HOME}/share/hadoop/common/lib/commons
-collections
-3.2.1.jar:${HADOOP_HOME}/share/hadoop/common/lib/guava-11.0.2.jar:${HADOOP_HOME}/share/hadoop/common/lib/commons
-lang
-2.6.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop
-auth
-2.7.0.jar:${HADOOP_HOME}/share/hadoop/common/lib/jetty-6.1.26.jar
export SPARK_CLASSPATH=${SPARK_CLASSPATH}:${HADOOP_HOME}/share/hadoop/common/lib/jersey
-server
-1.9.jar:${HADOOP_HOME}/share/hadoop/common/lib/jersey
-core
-1.9.jar:${HADOOP_HOME}/share/hadoop/common/lib/jersey
-json
-1.9.jar:${HADOOP_HOME}/share/hadoop/common/lib/snappy
-java
-1.0.4.1.jar
export SPARK_CLASSPATH=${SPARK_CLASSPATH}:${HADOOP_HOME}/share/hadoop/mapreduce/hadoop
-mapreduce
-client
-common
-2.7.0.jar
export SPARK_CLASSPATH=${SPARK_CLASSPATH}:${SPARK_HOME}/lib/spark
-assembly
-1.4.1-hadoop2.2.0.jar:${SPARK_HOME}/lib/spark-1.4.1
-yarn
-shuffle
.jar:${SPARK_HOME}/lib/spark
-examples
-1.4.1-hadoop2.2.0.jar
|
当执行./bin/spark-shell 命令行后,出现以下界面代表本地模式成功启动了Spark
2.R执行Spark命令处理文件
library(SparkR)
# 新建一个SparkContent
sc
<- sparkR.init(master=
"local"
)
|