0) My environment
macOS Big Sur Version 11.6
JDK jdk1.8
1) Download and install
## download hadoop-2.8.0.tar.gz from http://archive.apache.org/dist/
## unpack to ~/work/hadoop
## set environment variables
$ cd ~/work/hadoop
$ export HADOOP_HOME=~/work/hadoop
$ export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
2) configure and start
## modify configuration for pseudo-dist
$ cd ~/work/hadoop/etc/hadoop
$ diff -u hadoop-env.sh.orig hadoop-env.sh
--- hadoop-env.sh.orig 2017-03-17 13:31:33.000000000 +0800
+++ hadoop-env.sh 2022-05-16 21:50:41.000000000 +0800
@@ -22,7 +22,7 @@
# remote nodes.
# The java implementation to use.
-export JAVA_HOME=${JAVA_HOME}
+export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_321.jdk/Contents/Home
# The jsvc implementation to use. Jsvc is required to run secure datanodes
$ diff -u core-site.xml.orig core-site.xml
--- core-site.xml.orig 2017-03-17 13:31:33.000000000 +0800
+++ core-site.xml 2022-06-07 16:43:09.000000000 +0800
@@ -17,4 +17,21 @@
<!-- Put site-specific property overrides in this file. -->
<configuration>
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://localhost:9000</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/Users/sun_xo/work/hadoop/data/tmp</value>
+ </property>
+ <!-- OOZIE -->
+ <property>
+ <name>hadoop.proxyuser.sun_xo.hosts</name>
+ <value>*</value>
+ </property>
+ <property>
+ <name>hadoop.proxyuser.sun_xo.groups</name>
+ <value>*</value>
+ </property>
</configuration>
$ diff -u hdfs-site.xml.orig hdfs-site.xml
--- hdfs-site.xml.orig 2017-03-17 13:31:36.000000000 +0800
+++ hdfs-site.xml 2022-05-28 11:22:22.000000000 +0800
@@ -17,5 +17,12 @@
<!-- Put site-specific property overrides in this file. -->
<configuration>
-
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
</configuration>
## format hdfs
$ hdfs namenode -format
## start hadoop as pseudo-dist
$ cd ~/work/hadoop
$ mr.sh start
## create home on hdfs
$ hdfs dfs -mkdir -p /user/sunxo
## modify for pseudo-dist with yarn
$ cd ~/work/hadoop/etc/hadoop
$ diff -u yarn-site.xml.orig yarn-site.xml
--- yarn-site.xml.orig 2017-03-17 13:31:42.000000000 +0800
+++ yarn-site.xml 2022-05-17 09:09:40.000000000 +0800
@@ -13,7 +13,20 @@
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
-
-<!-- Site specific YARN configuration properties -->
-
+ <property>
+ <name>yarn.resourceManager.hostname</name>
+ <value>localhost</value>
+ </property>
+ <property>
+ <name>yarn.nodemanager.aux-services</name>
+ <value>mapreduce_shuffle</value>
+ </property>
+ <property>
+ <name>yarn.log.server.url</name>
+ <value>http://localhost:19888/jobhistory/logs</value>
+ </property>
+ <property>
+ <name>yarn.log-aggregation-enable</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>yarn.log-aggregation.retain-seconds</name>
+ <value>604800</value>
+ </property>
</configuration>
$ diff -u mapred-site.xml.template mapred-site.xml
$ diff -u mapred-site.xml.template mapred-site.xml
--- mapred-site.xml.template 2017-03-17 13:31:46.000000000 +0800
+++ mapred-site.xml 2022-05-16 14:52:39.000000000 +0800
@@ -17,5 +17,8 @@
<!-- Put site-specific property overrides in this file. -->
<configuration>
-
+ <property>
+ <name>mapreduce.framework.name</name>
+ <value>yarn</value>
+ </property>
</configuration>
## start hadoop as pseudo-dist yarn
$ cd ~/work/hadoop
$ mr.sh start yarn
$ jps
26976 NameNode
27378 NodeManager
27427 JobHistoryServer
27061 DataNode
27159 SecondaryNameNode
27295 ResourceManager
3) run a mapreduce case
$ mr.sh pseudo_dist
pseudo_dist
22/06/09 20:47:26 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Deleted input
22/06/09 20:47:28 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 20:47:29 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 20:47:30 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Deleted output
22/06/09 20:47:32 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 20:47:32 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
22/06/09 20:47:33 INFO input.FileInputFormat: Total input files to process : 1
22/06/09 20:47:33 INFO mapreduce.JobSubmitter: number of splits:1
22/06/09 20:47:33 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1654591879954_0011
22/06/09 20:47:33 INFO impl.YarnClientImpl: Submitted application application_1654591879954_0011
22/06/09 20:47:33 INFO mapreduce.Job: The url to track the job: http://localhost:8088/proxy/application_1654591879954_0011/
22/06/09 20:47:33 INFO mapreduce.Job: Running job: job_1654591879954_0011
22/06/09 20:47:39 INFO mapreduce.Job: Job job_1654591879954_0011 running in uber mode : false
22/06/09 20:47:39 INFO mapreduce.Job: map 0% reduce 0%
22/06/09 20:47:44 INFO mapreduce.Job: map 100% reduce 0%
22/06/09 20:47:48 INFO mapreduce.Job: map 100% reduce 100%
22/06/09 20:47:48 INFO mapreduce.Job: Job job_1654591879954_0011 completed successfully
22/06/09 20:47:49 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=104
FILE: Number of bytes written=273663
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=175
HDFS: Number of bytes written=66
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Rack-local map tasks=1
Total time spent by all maps in occupied slots (ms)=1818
Total time spent by all reduces in occupied slots (ms)=1834
Total time spent by all map tasks (ms)=1818
Total time spent by all reduce tasks (ms)=1834
Total vcore-milliseconds taken by all map tasks=1818
Total vcore-milliseconds taken by all reduce tasks=1834
Total megabyte-milliseconds taken by all map tasks=1861632
Total megabyte-milliseconds taken by all reduce tasks=1878016
Map-Reduce Framework
Map input records=1
Map output records=10
Map output bytes=103
Map output materialized bytes=104
Input split bytes=112
Combine input records=10
Combine output records=8
Reduce input groups=8
Reduce shuffle bytes=104
Reduce input records=8
Reduce output records=8
Spilled Records=16
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=71
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=321912832
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=63
File Output Format Counters
Bytes Written=66
22/06/09 20:47:49 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
apache 1
hadoop 2
http 1
hue 1
mapreduce 1
oozie 1
sqoop 2
test 1
## check from a browser
## hadoop-hdfs - http://localhost:50070/explorer.html#/
## hadoop-mapreduce - http://localhost:8088/cluster
## mr.sh
#!/bin/sh
usage() {
echo "Usage: $0 standalone"
echo " $0 start [yarn]"
echo " $0 stop [yarn]"
echo " $0 pseudo_dist"
}
standalone() {
cd $HADOOP_HOME/etc/hadoop
ln -sf core-site.xml.orig core-site.xml
ln -sf hdfs-site.xml.orig hdfs-site.xml
ln -sf yarn-site.xml.orig yarn-site.xml
rm -f mapred-site.xml
cd $HADOOP_HOME
mkdir -p input
echo test apache hadoop hadoop sqoop hue mapreduce sqoop oozie http > input/in.txt
rm -rf output
hadoop jar $jarfile wordcount input output
cat output/*
}
start() {
cd $HADOOP_HOME/etc/hadoop
ln -sf core-site.xml.pseudo core-site.xml
ln -sf hdfs-site.xml.pseudo hdfs-site.xml
if [ $# -eq 0 ]; then
ln -sf yarn-site.xml.orig yarn-site.xml
rm -f mapred-site.xml
else
ln -sf yarn-site.xml.pseudo yarn-site.xml
ln -sf mapred-site.xml.pseudo mapred-site.xml
fi
cd $HADOOP_HOME
# hdfs namenode -format
if [ "x`jps | grep -w "NameNode"`" = "x" ]; then
start-dfs.sh
fi
hdfs dfsadmin -safeoper leave
if [ $# -eq 1 -a "x`jps | grep -w "NodeManager"`" = "x" ]; then
start-yarn.sh
mr-jobhistory-daemon.sh start historyserver
fi
}
stop() {
if [ $# -eq 1 ]; then
mr-jobhistory-daemon.sh stop historyserver
stop-yarn.sh
fi
stop-dfs.sh
}
pseudo_dist() {
cd $HADOOP_HOME
mkdir -p input
echo test apache hadoop hadoop sqoop hue mapreduce sqoop oozie http > input/in.txt
hdfs dfs -rm -f -r input
hdfs dfs -mkdir input
hdfs dfs -put input/in.txt input
hdfs dfs -rm -f -r output
hadoop jar $jarfile wordcount input output
hdfs dfs -cat output/*
}
oper=$1
yarn=$2
jarfile=$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.0.jar
if [ \( x$oper = "xstandalone" -o x$oper = "xstart" -o x$oper = "xstop" -o x$oper = "xpseudo_dist" \) \
-a \( x$yarn = "x" -o x$yarn = "xyarn" \) ]; then
echo "$oper $yarn"
$oper $yarn
else
usage
exit 1
fi
reference: https://hadoop.apache.org/docs/r2.8.0/