hadoop集群
1.配置Hadoop ./etc/hadoop目录下的文件
hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/opt/bigdata/jdk180
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://cjh:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/bigdata/hadoop260/hadoop2</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>cjh:50090</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>cjh:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>cjh:19888</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<!-- reducer获取数据方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>cjh</value>
</property>
<!-- 日志聚集功能使用 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
vi ./slaves
cjh2
cjh3
cjh4
2.Hadoop环境变量配置
vi /etc/profile
export HADOOP_HOME=/opt/bigdata/hadoop260
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
然后记得source一下
source /etc/profile
3.格式化HDFS
hadoop namenode -format
4.启动hadoop
[root@cjh1 hadoop260]# start-dfs.sh
[root@cjh1 hadoop260]# start-yarn.sh
[root@cjh1 hadoop260]# mr-jobhistory-daemon.sh start historyserver
start-all.sh
启动历史服务
[root@cjh1 hadoop260]# mr-jobhistory-daemon.sh start historyserver
5.测试 hdfs
[root@cjh1 ~]# hdfs dfs -mkdir /input
[root@cjh1 ~]# hadoop fs -mkdir /input2
[root@cjh1 ~]# hdfs dfs -ls /
[root@cjh1 ~]# hadoop fs -ls /
[root@cjh1 ~]# hadoop fs -put ./wordcount.csv /input
[root@cjh1 mapreduce2]# hadoop fs -put /root/wordcount.txt /input
[root@cjh1 mapreduce2]# hadoop fs -rm /input/*.csv
[root@cjh1 mapreduce2]# hadoop jar ./hadoop-mapreduce-examples-2.6.0-cdh5.14.2.jar wordcount /input/outputcsv
[root@cjh1 mapreduce2]# hadoop fs -cat /outputcsv/part-r-00000