下面是hadoop发布版本, bin目录下面的hadoop命令的源码,hadoop命令支持好多种参数,一直记不住,想通过精度这部分代码,能记住部分参数.
#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script runs the hadoop core commands. #这3行命令的主要目的是,获取Hadoop运行所在目录. bin=`which $0` bin=`dirname ${bin}` bin=`cd "$bin"; pwd` #定位找到 hadoop-config.sh 文件,里面包含了很多Hadoop命令的配置文件. #先找HADOOP_LIBEXEC_DIR目录,如果没有定义,就使用默认的路径,也就是hadoop根目录下面的libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/hadoop-config.sh function print_usage(){ echo "Usage: hadoop [--config confdir] COMMAND" echo " where COMMAND is one of:" echo " fs run a generic filesystem user client" echo " version print the version" echo " jar <jar> run a jar file" echo " checknative [-a|-h] check native hadoop and compression libraries availability" echo " distcp <srcurl> <desturl> copy file or directories recursively" echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive" echo " classpath prints the class path needed to get the" echo " Hadoop jar and the required libraries" echo " daemonlog get/set the log level for each daemon" echo " or" echo " CLASSNAME run the class named CLASSNAME" echo "" echo "Most commands print help when invoked w/o parameters." } #如果命令参数个数为0,则打印提示,退出 if [ $# = 0 ]; then print_usage exit fi #解析第1个参数,第0个参数是命令本身 COMMAND=$1 case $COMMAND in # usage flags --help|-help|-h) print_usage exit ;; #hdfs commands namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups) echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2 echo "Instead use the hdfs command for it." 1>&2 echo "" 1>&2 #try to locate hdfs and if present, delegate to it. shift if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" else echo "HADOOP_HDFS_HOME not found!" exit 1 fi ;; #mapred commands for backwards compatibility pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker|mrhaadmin|mrzkfc|jobtrackerha) echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2 echo "Instead use the mapred command for it." 1>&2 echo "" 1>&2 #try to locate mapred and if present, delegate to it. shift if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" else echo "HADOOP_MAPRED_HOME not found!" exit 1 fi ;; #打印出Hadoop执行时的classpath,方便查找classpath的错误 classpath) if $cygwin; then CLASSPATH=`cygpath -p -w "$CLASSPATH"` fi echo $CLASSPATH exit ;; #core commands *) # the core commands if [ "$COMMAND" = "fs" ] ; then CLASS=org.apache.hadoop.fs.FsShell elif [ "$COMMAND" = "version" ] ; then CLASS=org.apache.hadoop.util.VersionInfo elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar elif [ "$COMMAND" = "checknative" ] ; then CLASS=org.apache.hadoop.util.NativeLibraryChecker elif [ "$COMMAND" = "distcp" ] ; then CLASS=org.apache.hadoop.tools.DistCp CLASSPATH=${CLASSPATH}:${TOOL_PATH} elif [ "$COMMAND" = "daemonlog" ] ; then CLASS=org.apache.hadoop.log.LogLevel elif [ "$COMMAND" = "archive" ] ; then CLASS=org.apache.hadoop.tools.HadoopArchives CLASSPATH=${CLASSPATH}:${TOOL_PATH} elif [[ "$COMMAND" = -* ]] ; then # class and package names cannot begin with a - echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'" exit 1 else #如果上面的都没匹配上,那么第一个参数作为classname 来解析,比如下面就是一个示例 #hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46 CLASS=$COMMAND fi #删除$@中的第一个参数,比如"hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46" #在运行shift之前$@=org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46 #之后$@=/tmp/15 /tmp/46 shift # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS # 对应的这两个变量默认的定义在文件hadoop-config.sh,如果要修改启动参数,也可以修改这个文件,比如想开启远程debug HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" #make sure security appender is turned off HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" #兼容cygwin模拟器 if $cygwin; then CLASSPATH=`cygpath -p -w "$CLASSPATH"` fi #没什么意思,放在这是为了方便修改扩展CLASSPATH export CLASSPATH=$CLASSPATH exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" ;; esac