(1)依赖包:
libssl-dev zlib1g-dev openssl protobuf2.5 maven jdk1.7 cmake openssh-server
(2)
mvn clean package -DskipTests -Pdist,native, docs
其中docs根据需要添加。
(3)配置文件:
1. core-site.xml:
<
configuration
>
<
configuration
>
<
property
>
<
name
>fs.defaultFS</
name
>
<
value
>hdfs://hostname:9000</
value
>
</
property
>
<
property
>
<
name
>io.file.buffer.size</
name
>
<
value
>131072</
value
>
</
property
>
<
property
>
<
name
>hadoop.tmp.dir</
name
>
<
value
>file:/home/hadoop/hadoop-2.5.1/temp</
value
>
<
description
>Abase for other temporary directories.</
description
>
</
property
>
<
property
>
<
name
>hadoop.proxyuser.hduser.hosts</
name
>
<
value
>*</
value
>
</
property
>
<
property
>
<
name
>hadoop.proxyuser.hduser.groups</
name
>
<
value
>*</
value
>
</
property
>
</
configuration
>
2.hdfs-site.xml
<
configuration
>
<
property
>
<
name
>dfs.namenode.secondary.http-address</
name
>
<
value
>hostname:9001</
value
>
</
property
>
<
property
>
<
name
>dfs.namenode.name.dir</
name
>
<
value
>file:/home/hadoop/hadoop-2.5.1/dfs/name</
value
>
</
property
>
<
property
>
<
name
>dfs.datanode.data.dir</
name
>
<
value
>file:/home/hadoop/hadoop-2.5.1/dfs/data</
value
>
</
property
>
<
property
>
<
name
>dfs.replication</
name
>
<
value
>1</
value
>
</
property
>
<
property
>
<
name
>dfs.webhdfs.enabled</
name
>
<
value
>true</
value
>
</
property
>
</
configuration
>
3.mapred-site.xml
<
configuration
>
<
property
>
<
name
>mapreduce.framework.name</
name
>
<
value
>yarn</
value
>
</
property
>
<
property
>
<
name
>mapreduce.jobhistory.address</
name
>
<
value
>hostname:10020</
value
>
</
property
>
<
property
>
<
name
>mapreduce.jobhistory.webapp.address</
name
>
<
value
>hostname:19888</
value
>
</
property
>
</
configuration
>
4. yarn-site.xml
<
configuration
>
<
property
>
<
name
>yarn.nodemanager.aux-services</
name
>
<
value
>mapreduce_shuffle</
value
>
</
property
>
<
property
>
<
name
>yarn.nodemanager.aux-services.mapreduce.shuffle.class</
name
>
<
value
>org.apache.hadoop.mapred.ShuffleHandler</
value
>
</
property
>
<
property
>
<
name
>yarn.resourcemanager.address</
name
>
<
value
>hostname:8032</
value
>
</
property
>
<
property
>
<
name
>yarn.resourcemanager.scheduler.address</
name
>
<
value
>hostname:8030</
value
>
</
property
>
<
property
>
<
name
>yarn.resourcemanager.resource-tracker.address</
name
>
<
value
>hostname:8031</
value
>
</
property
>
<
property
>
<
name
>yarn.resourcemanager.admin.address</
name
>
<
value
>hostname:8033</
value
>
</
property
>
<
property
>
<
name
>yarn.resourcemanager.webapp.address</
name
>
<
value
>hostname:8088</
value
>
</
property
>
</
configuration
>
5. hadoop-env.sh
export JAVA_HOME=/usr/java/jdk.1.7.0_67
6 .yarn-env.sh
export JAVA_HOME=/usr/java/jdk1.7.0_67
7.修改完配置文件, 将修改后的所有文件都复制(覆盖)到各个节点 hadoop 目录下(必须保证.ssh权限为700)
ssh-keygen -t rsa
cat id_rsa.pub >> authorized_keys
或者 ssh-copy-id -i username@serverIP
scp hadoop-env.sh hdfs-site.xml mapred-site.xml slaves yarn-env.sh yarn-site.xml
hadoop@hostname:/home/hadoop/hadoop-1.5.1/etc/hadoop/
(4)格式化namenode
hdfs namenode -
format
(5)启动hdfs
hadoop/sbin/start-hdfs.sh
(6)hbase:
1.hbase-site.xml:
< configuration >
< property >
< name > hbase.rootdir </ name >
< value > hdfs://master:9000/hbase </ value >
</ property >
< property >
< name > hbase.cluster.distributed </ name >
< value > true </ value >
</ property ><property><name>hbase.master</name><value>hdfs://master:6000</value></property>
< property >
< name > hbase.zookeeper.quorum </ name >
< value > master,slave1 </ value >
</ property >
< property >
< name > hbase.zookeeper.property.dataDir </ name >
< value > /home/hadoop/opt/zookeeper-3.4.6 </ value >
</ property >
</ configuration >
2.修改regionservers
master
slave1
3.hbase-env.sh
export HBASE_MANAGES_ZK=true//hbase自己维护zookeeper
4.启动hbase
./zkServer.sh start
./start-hbase.sh
(7)spark编译:
mvn -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests clean package
mvn -Dhadoop.version=2.6.0 -Phadoop-2.6 -Dmaven.test.skip=true clean package -Dmaven.compile.fork=true
(8)spark/conf配置:
spark-env.sh:
export JAVA_HOME=/usr/java/jdk1.8.0_101
export HADOOP_HOME=/home/templarzq/hadoop
export HADOOP_CONF_DIR=/home/templarzq/hadoop/etc/hadoop
export <span style="font-family: Arial, Helvetica, sans-serif;">SPARK_HOME=/home/templarzq/spark</span>
export <span style="font-family: Arial, Helvetica, sans-serif;">SPARK_CLASSPATH="$ES_HOME/lib/*":"$KAFKA_HOME/libs/*":"$SPARK_HOME/jars/*":$SPARK_CLASSPATH</span>
export SPARK_LOCAL_IP=192.168.8.157
export SPARK_EXECUTOR_INSTANCES=1
export SPARK_EXECUTOR_CORES=6
export SPARK_EXECUTOR_MEMORY=4G
export SPARK_UI_PORT=4040
export SPARK_MASTER_IP=192.168.8.157
#export SPARK_MASTER_IP=localhost
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8099
export SPARK_MASTER_HOST=192.168.8.157
export SPARK_WORKER_CORES=6
export SPARK_WORKER_INSTANCES=2
export SPARK_WORKER_MEMORY=8g
export SPARK_WORKER_WEBUI_PORT=8081
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HADOOP_HOME/lib/native
slaves:添加slave ip
spark-deafults.conf:
spark.master spark://192.168.8.157:7077
spark.eventLog.enabled true
spark.eventLog.dir /tmp/eventLog
# spark.eventLog.dir hdfs://192.168.8.157:9000/eventLog
# spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 512m
spark.executor.extraJavaOptions -XX:+UseG1GC
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.es.nodes 192.168.8.157
spark.es.port 9200
spark.sql.hive.thriftServer.singleSession true
(8)启动spark
$SPARK_HOME/sbin/start-all.sh
(或者:
bin/spark-sql –master spark://10.8.2.100:7077 –jars /usr/local/spark-1.5.2/lib/elasticsearch-hadoop-2.1.1.jar
spark-shell --master spark://10.8.2.100:7077
)
(9)启动thriftserver:
编辑$SPARK_HOME/conf/hive-default.xml
<configuration>
<property><name>javax.jdo.option.ConnectionURL</name><value>jdbc:derby:;databaseName=metastore_db;create=true</value><description>JDBC connect string for a JDBC metastore</description></property>
<property><name>javax.jdo.option.ConnectionDriverName</name><value>org.apache.derby.jdbc.EmbeddedDriver</value><description>Driver class name for a JDBC metastore</description></property>
<property><name>hive.metastore.warehouse.dir</name><value>/home/hadoop/hive/hive-warehouse</value><description>Where to store metastore data</description></property>
</configuration>
重启spark: $SPARK_HOME/sbin/start-all.sh
启动thriftserver:$SPARK_HOME/sbin/start-thriftserver.sh --master spark://localhost:7077