Spark集群安装笔记



# HADOOP VARIABLES START
export JAVA_HOME=/usr/java
export JRE_HOME=$JAVA_HOME/jre
export SCALA_HOME=/usr/scala-2.11.6
export MAVEN_HOME=/usr/apache-maven-3.3.9
export ANT_HOME=/usr/apache-ant-1.9.6
export SPARK_HOME=/usr/hadoop/spark-1.6.1-bin-hadoop2.6
export SPARK_JAR=/usr/hadoop/spark-1.6.1-bin-hadoop2.6/lib/spark-assembly-1.6.0-cdh5.7.1-hadoop2.6.0-cdh5.7.1.jar
export CLASSPATH=$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export HADOOP_HOME=/usr/hadoop/hadoop-2.6.4
export PATH=$PATH:$HADOOP_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$SCALA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin:${MAVEN_HOME}/bin:/softback/sbt/bin:$ANT_HOME/bin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
# HADOOP VARIABLES END


------------ssh免登陆------
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.31 'cat >> ~/.ssh/authorized_keys'
cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.32 'cat >> ~/.ssh/authorized_keys'


cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.30 'cat >> ~/.ssh/authorized_keys'
cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.32 'cat >> ~/.ssh/authorized_keys'


cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.30 'cat >> ~/.ssh/authorized_keys'
cat ~/.ssh/id_dsa.pub | ssh root@192.168.1.31 'cat >> ~/.ssh/authorized_keys'
----------------------------------------------
scp -r /usr/scala-2.11.6 root@192.168.1.31:/usr/scala-2.11.6
scp -r /usr/scala-2.11.6 root@192.168.1.32:/usr/scala-2.11.6


scp -r /etc/profile root@192.168.1.31:/etc/profile
scp -r /etc/profile root@192.168.1.32:/etc/profile




scp -r /usr/hadoop/hadoop-2.6.4 root@192.168.1.31:/usr/hadoop/hadoop-2.6.4
scp -r /usr/hadoop/hadoop-2.6.4 root@192.168.1.32:/usr/hadoop/hadoop-2.6.4


scp -r /usr/hadoop/spark-1.6.1-bin-hadoop2.6 root@192.168.1.31:/usr/hadoop/spark-1.6.1-bin-hadoop2.6
scp -r /usr/hadoop/spark-1.6.1-bin-hadoop2.6 root@192.168.1.32:/usr/hadoop/spark-1.6.1-bin-hadoop2.6


scp -r /usr/java root@192.168.1.31:/usr/java
scp -r /usr/java root@192.168.1.32:/usr/java


scp -f /etc/hosts root@192.168.1.31:/etc/hosts
scp -f /etc/hosts root@192.168.1.32:/etc/hosts


scp -r /usr/hadoop/spark-1.6.1-bin-hadoop2.6/conf/spark-env.sh root@192.168.1.31:/usr/hadoop/spark-1.6.1-bin-hadoop2.6/conf/spark-env.sh
scp -r /usr/hadoop/spark-1.6.1-bin-hadoop2.6/conf/spark-env.sh root@192.168.1.32:/usr/hadoop/spark-1.6.1-bin-hadoop2.6/conf/spark-env.sh


--------------------------------------------------------------
mkdir -p /data/hadoop/namenode
mkdir -p /data/hadoop/datanode
mkdir -p /data/hadoop/hadoop_tmp


rm -rf /data/hadoop/namenode/*
rm -rf /data/hadoop/datanode/*
rm -rf /data/hadoop/hadoop_tmp/*


------------------------------
-----core-site.xml--------------------------
<configuration>
<!-- file system properties -->
<property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
</property>
<property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
</property>
<property>
        <name>hadoop.tmp.dir</name>
        <value>/data/hadoop/hadoop_tmp</value>
        <description>Abasefor other temporary directories.</description>
</property>
<property>
        <name>hadoop.proxyuser.spark.hosts</name>
        <value>*</value>
</property>
<property>
        <name>hadoop.proxyuser.spark.groups</name>
        <value>*</value>
</property>
</configuration>


-------hdfs-site.xml------
<configuration>
<property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:9001</value>
</property>
<property>
        <name>dfs.namenode.name.dir</name>
        <value>/data/hadoop/namenode</value>
</property>
<property>
        <name>dfs.datanode.data.dir</name>
        <value>/data/hadoop/datanode</value>
</property>
<property>
        <name>dfs.replication</name>
        <value>1</value>
</property>
<property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
</property>
<property>
        <name>dfs.datanode.du.reserved</name>
        <value>10240000000</value>
</property>
</configuration>




-----------yarn-site.xml--------------------------------
<!-- Site specific YARN configuration properties -->
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>master:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>master:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>master:8035</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>master:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>master:8088</value>
    </property>
</configuration>
----------mapred-site.xml----------------------------------------------------
<configuration>
<property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
</property>
<property>
        <name>mapreduce.jobhistory.address</name>
        <value>master:10020</value>
</property>
<property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>master:19888</value>
</property>
</configuration>
---------------------------------------------------------------------
hadoop namenode -format 
---------启动HDFS------------------
start-dfs.sh
-------------------------------------------


-----------启动YARN-----------
start-yarn.sh
--------------spark spark-env.sh文件 在最下面增加------------------------------
----------
JAVA_HOME=/usr/java
SCALA_HOME=/usr/scala-2.11.6
SPARK_MASTER_IP=master
SPARK_PORT=7077
SPARK_CORES=1
SPARK_WORKER_MEMORY=1g
SPARK_WORKER_PORT=7078
SPARK_WORKER_WEBUI_PORT=8081
SPARK_WORKER_INSTANCES=1
HADOOP_CONF_DIR=/usr/hadoop/hadoop-2.6.4/etc/hadoop


SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://master:9000/spark/history"
------spark-defaults.conf----------------------------------------------------------------------------
spark.master                     spark://master:7077
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://master:9000/spark/history
spark.eventLog.compress           true
------------------------------------------------------------------------------------------------------
export MAVEN_OPTS="-Xms256m -Xmx512m"
mvn clean package -DskipTests -Pdist,native -Dtar


yum -y install gcc*
yum -y install cmake
yum -y install glibc-headers
yum -y install gcc-c++
yum -y install zip-devel
yum -y install openssl-devel
yum -y install svn
yum -y install git
yum -y install ncurses-devel 
yum -y install lzo-devel
yum -y install autoconf
yum -y install libtool
yum -y install automake
----------配置jdk 指向-----------------
which java
alternatives --install /usr/bin/java java /usr/java/bin/java 300
alternatives --config java
------------
/softback/hadoop-2.6.4-src/hadoop-dist/target/hadoop-2.6.4.tar.gz
--------------启动 spark---------------
spark-shell --executor-memory 2g --driver-memory 1g --master spark://master:7077


spark-shell --master local[2]


--------------------测试-------------------------
val rdd = sc.textFile("hdfs://192.168.1.30:9000/spark/test.txt")
rdd.cache()
val wordcount = rdd.flatMap(_.split(" ")).map(x=>(x,1)).reduceByKey(_+_)
wordcount.take(10)
val wordsort = wordcount.map(x=>(x._2,x._1)).sortByKey(false).map(x=>(x._2,x._1))
wordsort.take(10)


--------------------------
http://plugins.jetbrains.com/files/1347/25886/scala-intellij-bin-3.0.6.zip
------------web 访问地址-------------------
http://master:8080
http://master:4040
----------hdfs访问地址--------------------------------
http://master:50070
-------------spark history执行历史查询---------------------------------------------
http://master:18080
-------------------------------------------------------------------------------
-------------------idea spark 项目导入----------------------
VCS→Check out from Version Control→Git
https://github.com/apache/spark.git
---------------------------
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值