一、ssh免密
指令 用处 ssh-keygen -t rsa -P “” 创建私钥 cat ~/.ssh/id_rsa_pub > ~/.ssh/authorized_keys 创建公钥 cat ~/.ssh/id_rsa_pub >> ~/.ssh/authorized_keys 在公钥中追加私钥 chmod 700 ~/.ssh/authorized_keys 修改权限
二、java
1. 解压
tar -zxvf /opt/software/jdk-8u11-linux-x64.tar.gz -C /usr/local/src
2. 在环境变量/etc/profile中添加
export JAVA_HOME= /usr/local/src/java
export PATH= $PATH : $JAVA_HOME /bin
三、zookeeper
1. 解压
tar -zxvf /opt/software/zookeeper-3.4.8.tar.gz -C /usr/local/src/
2. 在环境变量/etc/profile中添加
export ZK_HOME= /usr/local/src/zookeeper
export PATH= $PATH : $ZK_HOME /bin
3. 将 zoo_sample.cfg复制为zoo.cfg,添加并更改如下配置:
dataDir= /usr/local/src/zookeeper/data
dataLogDir= /usr/local/src/zookeeper/logs
server.1= master:2888:3888
server.2= slave1:2888:3888
server.3= slave2:2888:3888
4. 切换到zookeeper的data文件夹下创建myid
echo "1" > myid
四、高可用hadoop
文件名称 用途 hadoop-env.sh 由于Hadoop是java进程,所以需要添加jdk core-site.xml 指定namenode的位置,配置全局参数 hdfs-site.xml 配置namenode和datanode存放文件的具体路径,配置HDFS参数 mapred-site.xml 决定mapreduce作业是提交到 YARN集群还是使用本地作业执行器本地执行 yarn-site.xml 集群资源管理系统参数 slaves 写入的ip对应的机器执行DataNode 和 NodeManager
1. 解压
tar -zxvf /opt/software/hadoop-2.7.1.tar.gz -C /usr/local/src/
2. 在环境变量/etc/profile中添加
export HADOOP_HOME= /usr/local/src/hadoop
export HADOOP_PREFIX= $HADOOP_HOME
export HADOOP_MAPRED_HOME= $HADOOP_HOME
export HADOOP_COMMON_HOME= $HADOOP_HOME
export HADOOP_HDFS_HOME= $HADOOP_HOME
export YARN_HOME= $HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR= $HADOOP_HOME /lib/native
export HADOOP_INSTALL= $HADOOP_HOME
export HADOOP_OPTS= "-
Djava.library.path=$HADOOP_INSTALL /lib:$HADOOP_COMMON_LIB_NATIVE_DIR "
export PATH= $PATH : $HADOOP_HOME /bin:$HADOOP_HOME /sbin
3. 配置文件
- hadoop-env.sh
export JAVA_HOME=/usr/local/src/java
- core-site.xml —— 配置全局参数
< configuration>
< property>
< name> fs.defaultFS</ name>
< value> hdfs://mycluster</ value>
</ property>
< property>
< name> hadoop.tmp.dir</ name>
< value> file:/usr/local/src/hadoop/tmp</ value>
</ property>
< property>
< name> ha.zookeeper.quorum</ name>
< value> master:2181,slave1:2181,slave2:2181</ value>
</ property>
< property>
< name> ha.zookeeper.session-timeout.ms</ name>
< value> 30000</ value>
< description> ms</ description>
</ property>
< property>
< name> fs.trash.interval</ name>
< value> 1440</ value>
</ property>
< property>
< name> hadoop.proxyuser.hadoop.hosts</ name>
< value> *</ value>
</ property>
< property>
< name> hadoop.proxyuser.hadoop.groups</ name>
< value> *</ value>
</ property>
</ configuration>
- hdfs-site.xml —— HDFS参数
< configuration>
< property>
< name> dfs.qjournal.start-segment.timeout.ms</ name>
< value> 60000</ value>
</ property>
< property>
< name> dfs.nameservices</ name>
< value> mycluster</ value>
</ property>
< property>
< name> dfs.ha.namenodes.mycluster</ name>
< value> master,slave1</ value>
</ property>
< property>
< name> dfs.namenode.rpc-address.mycluster.master</ name>
< value> master:8020</ value>
</ property>
< property>
< name> dfs.namenode.rpc-address.mycluster.slave1</ name>
< value> slave1:8020</ value>
</ property>
< property>
< name> dfs.namenode.http-address.mycluster.master</ name>
< value> master:50070</ value>
</ property>
< property>
< name> dfs.namenode.http-address.mycluster.slave1</ name>
< value> slave1:50070</ value>
</ property>
< property>
< name> dfs.namenode.shared.edits.dir</ name>
< value> qjournal://master:8485;slave1:8485;slave2:8485/mycluster</ value>
</ property>
< property>
< name> dfs.client.failover.proxy.provider.mycluster</ name>
< value> org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</ value>
</ property>
< property>
< name> dfs.ha.fencing.methods</ name>
< value>
sshfence
shell(/bin/true)
</ value>
</ property>
< property>
< name> dfs.permissions.enabled</ name>
< value> false</ value>
</ property>
< property>
< name> dfs.support.append</ name>
< value> true</ value>
</ property>
< property>
< name> dfs.ha.fencing.ssh.private-key-files</ name>
< value> /root/.ssh/id_rsa</ value>
</ property>
< property>
< name> dfs.replication</ name>
< value> 2</ value>
</ property>
< property>
< name> dfs.namenode.name.dir</ name>
< value> /usr/local/src/hadoop/tmp/hdfs/nn</ value>
</ property>
< property>
< name> dfs.datanode.data.dir</ name>
< value> /usr/local/src/hadoop/tmp/hdfs/dn</ value>
</ property>
< property>
< name> dfs.journalnode.edits.dir</ name>
< value> /usr/local/src/hadoop/tmp/hdfs/jn</ value>
</ property>
< property>
< name> dfs.ha.automatic-failover.enabled</ name>
< value> true</ value>
</ property>
< property>
< name> dfs.webhdfs.enabled</ name>
< value> true</ value>
</ property>
< property>
< name> dfs.ha.fencing.ssh.connect-timeout</ name>
< value> 30000</ value>
</ property>
< property>
< name> ha.failover-controller.cli-check.rpc-timeout.ms</ name>
< value> 60000</ value>
</ property>
</ configuration>
- mapred-site.xml —— MapReduce参数
< configuration>
< property>
< name> mapreduce.framework.name</ name>
< value> yarn</ value>
</ property>
< property>
< name> mapreduce.jobhistory.address</ name>
< value> master:10020</ value>
</ property>
< property>
< name> mapreduce.jobhistory.webapp.address</ name>
< value> master:19888</ value>
</ property>
</ configuration>
- yarn-site.xml —— 集群资源管理系统参数
< configuration>
< property>
< name> yarn.resourcemanager.ha.enabled</ name>
< value> true</ value>
</ property>
< property>
< name> yarn.resourcemanager.cluster-id</ name>
< value> yrc</ value>
</ property>
< property>
< name> yarn.resourcemanager.ha.rm-ids</ name>
< value> rm1,rm2</ value>
</ property>
< property>
< name> yarn.resourcemanager.hostname.rm1</ name>
< value> master</ value>
</ property>
< property>
< name> yarn.resourcemanager.hostname.rm2</ name>
< value> slave1</ value>
</ property>
< property>
< name> yarn.resourcemanager.zk-address</ name>
< value> master:2181,slave1:2181,slave2:2181</ value>
</ property>
< property>
< name> yarn.nodemanager.aux-services</ name>
< value> mapreduce_shuffle</ value>
</ property>
< property>
< name> yarn.log-aggregation-enable</ name>
< value> true</ value>
</ property>
< property>
< name> yarn.log-aggregation.retain-seconds</ name>
< value> 86400</ value>
</ property>
< property>
< name> yarn.resourcemanager.recovery.enabled</ name>
< value> true</ value>
</ property>
< property>
< name> yarn.resourcemanager.store.class</ name>
< value> org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</ value>
</ property>
< property>
< name> yarn.nodemanager.pmem-check-enabled</ name>
< value> false</ value>
</ property>
< property>
< name> yarn.nodemanager.vmem-check-enabled</ name>
< value> false</ value>
< description> Whether virtual memory limits will be enforced for containers</ description>
</ property>
< property>
< name> yarn.nodemanager.vmem-pmem-ratio</ name>
< value> 4</ value>
< description> Ratio between virtual memory to physical memory when setting memory limits for containers</ description>
</ property>
</ configuration>
- slaves
master
slave1
slave2
五、hbase
文件名称 用途 hbase-env.sh 配置环境变量 hbase-site.xml hbase主要的配置文件 regionservers 一个文本文件,包含所有需要运行regionserver服务的注解
1. 解压
tar -zxvf /opt/software/hbase-1.2.1-bin.tar.gz -C /usr/local/src/
2. 在环境变量/etc/profile中添加
export HBASE_HOME= /usr/local/src/hbase
export PATH= $PATH : $HBASE_HOME /bin
3. 配置文件
- hbase-env.sh
export JAVA_HOME= /usr/local/src/java
export HADOOP_HOME= /usr/local/src/hadoop
export HADOOP_CONF_DIR= ${HADOOP_HOME} /etc/hadoop
export HBASE_MANAGES_ZK= false
export HBASE_LOG_DIR= ${HBASE_HOME} /logs
export HBASE_PID_DIR= ${HBASE_HOME} /pid
- hbase-site.xml
< configuration>
< property>
< name> hbase.rootdir</ name>
< value> hdfs://master:8020/hbase</ value>
</ property>
< property>
< name> hbase.master.info.port</ name>
< value> 16010</ value>
</ property>
< property>
< name> hbase.zookeeper.property.clientPort</ name>
< value> 2181</ value>
</ property>
< property>
< name> hbase.tmp.dir</ name>
< value> /usr/local/src/hbase/tmp</ value>
</ property>
< property>
< name> zookeeper.session.timeout</ name>
< value> 120000</ value>
</ property>
< property>
< name> hbase.cluster.distributed</ name>
< value> true</ value>
</ property>
< property>
< name> hbase.zookeeper.quorum</ name>
< value> master,slave1,slave2</ value>
</ property>
< property>
< name> hbase.zookeeper.property.dataDir</ name>
< value> /usr/local/src/hbase/tmp/zookeeper-hbase</ value>
</ property>
</ configuration>
- regionservers
slave1
slave2
4. hadoop中的配置文件复制到hbase的conf中
cp /usr/local/src/hadoop/etc/hadoop/core-site.xml /usr/local/src/hbase/conf/
cp /usr/local/src/hadoop/etc/hadoop/hdfs-site.xml /usr/local/src/hbase/conf/
六、mysql
1. 配置本地yum源
mkdir /mnt/cdrom
mount -t iso9660 /dev/cdrom /mnt/cdrom
mv /etc/yum.repos.d /etc/yum.repos.d.bak
mkdir /etc/yum.repos.d
vi /etc/yum.repos.d/CentOS-local.repo
[ base-local]
name= centos
baseurl= file:///mnt/cdrom
enabled= 1
gpgcheck= 1
gpgkey= file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
yum clean all
yum makecache
2. 卸载mariadb并安装mysql
rpm -qa | grep mariadb
rpm -e --nodeps mariadb-libs-5.5.56-2.el7.x86_64
rpm -ivh mysql-community-common-5.7.18-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-5.7.18-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-5.7.18-1.el7.x86_64.rpm
yum install net-tools
yum install perl
rpm -ivh mysql-community-server-5.7.18-1.el7.x86_64.rpm
3. 在/etc/my.cnf中添加
default-storage-engine= innodb
innodb_file_per_table
collation-server= utf8_general_ci
init-connect= 'SET NAMES utf8'
character-set-server= utf8
4. 启动mysql并修改密码
systemctl start mysqld —— 启动mysql
systemctl status mysqld —— 查看mysql状态
cat /var/log/mysqld.log | grep password —— 得到 MySQL 初始密码
mysql_secure_installation —— 重新设定密码和配置
mysql -uroot -pPassword123$
七、hive
1. 解压
文件名称 用途 hive-env.sh 配置环境变量 hive-site.xml hive主要的配置文件
tar -zxvf apache-hive-2.0.0-bin.tar.gz -C /usr/local/src/
2. 在环境变量/etc/profile中添加
export HIVE_HOME= /usr/local/src/hive
export PATH= $HIVE_HOME /bin:$PATH
export HIVE_CONF_DIR= $HIVE_HOME /conf
3. 配置文件
- hive-env.sh
export JAVA_HOME= /usr/local/src/java
export HADOOP_HOME= /usr/local/src/hadoop
export HIVE_CONF_DIR= /usr/local/src/hive/conf
export HIVE_AUX_JARS_PATH= /usr/local/src/hive/lib
- hive-site.xml
< configuration>
< property>
< name> javax.jdo.option.ConnectionURL</ name>
< value> jdbc:mysql://master:3306/hive_db?createDatabaseIfNotExist=true</ value>
</ property>
< property>
< name> javax.jdo.option.ConnectionUserName</ name>
< value> hive</ value>
</ property>
< property>
< name> javax.jdo.option.ConnectionPassword</ name>
< value> Password123$</ value>
</ property>
< property>
< name> javax.jdo.option.ConnectionDriverName</ name>
< value> com.mysql.jdbc.Driver</ value>
</ property>
< property>
< name> hive.downloaded.resources.dir</ name>
< value> /usr/local/src/hive/tmp</ value>
</ property>
< property>
< name> hive.exec.local.scratchdir</ name>
< value> /usr/local/src/hive/tmp/${hive.session.id}_resources</ value>
</ property>
< property>
< name> hive.querylog.location</ name>
< value> /usr/local/src/hive/tmp</ value>
</ property>
< property>
< name> hive.server2.logging.operation.log.location</ name>
< value> /usr/local/src/hive/tmp/operation_logs</ value>
</ property>
< property>
< name> hive.server2.webui.host</ name>
< value> master</ value>
</ property>
< property>
< name> hive.server2.webui.port</ name>
< value> 10002</ value>
</ property>
</ configuration>
4. 将mysql驱动复制到hive中
cp /opt/software/mysql-connector-java-5.1.46.jar /usr/local/src/hive/lib/
5. 初始化mysql
schematool -initSchema -dbType mysql
八、sqoop
1. 解压
tar -zxvf /opt/software/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -C /usr/local/src
2. 在环境变量/etc/profile中添加
export SQOOP_HOME= /usr/local/src/sqoop
export PATH= $PATH : $SQOOP_HOME /bin
export CLASSPATH= $CLASSPATH : $SQOOP_HOME /lib
3. 配置文件
- sqoop-env.sh
export HADOOP_COMMON_HOME= /usr/local/src/hadoop
export HADOOP_MAPRED_HOME= /usr/local/src/hadoop
export HBASE_HOME= /usr/local/src/hbase
export HIVE_HOME= /usr/local/src/hive
4. 将mysql驱动复制到sqoop中
cp /opt/software/mysql-connector-java-5.1.46.jar /usr/local/src/sqoop/lib/
九、flume
1. 解压
文件名称 用途 flume-env.sh 配置环境变量 hdfs_sink.conf Flume配置文件
tar zxvf /opt/software/apache-flume-1.6.0-bin.tar.gz -C /usr/local/src
2. 在环境变量/etc/profile中添加
export FLUME_HOME= /usr/local/src/flume
export PATH= $PATH : $FLUME_HOME /bin
3. 配置文件
- flume-env.sh
export JAVA_HOME= /usr/local/src/jdk1.8.0_11
- /usr/local/src/flume/conf/hdfs_sink.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = syslogtcp
a1.sources.r1.port = 5140
a1.sources.r1.host = localhost
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://master:8020/user/flume/syslogtcp
a1.sinks.k1.hdfs.filePrefix = Syslog
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.useLocalTimeStamp= true
a1.sinks.k1.hdfs.roundUnit = minute
a1.channels.c1.type = memory
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
4. 安装组件
yum install telnet
yum install telnet-server
yum install xinetd
在/etc/xinetd.d下新建telnet配置文件
service telnet
{
flags = REUSE
socket_type = stream
wait = no
user = root
server = /usr/sbin/in.telnetd
log_on_failure += USERID
disable = no
}
5. 启动Telnet服务
systemctl start xinetd
systemctl enable xinetd
十、scala
1. 解压
tar -zxvf /opt/software/scala-2.11.8.tgz -C /usr/local/src
2. 在环境变量/etc/profile中添加
export SCALA_HOME= /usr/local/src/scala
export PATH= $PATH : $SCALA_HOME /bin
十一、spark
1. 解压
文件名称 用途 spark-env.sh 配置环境变量 slaves 集群节点ip
tar -zxvf /opt/software/spark-2.0.0-bin-hadoop2.6.tgz -C /usr/local/src/
2. 在环境变量/etc/profile中添加
export SPARK_HOME= /usr/local/src/spark
export PATH= $PATH : $SPARK_HOME /bin:$PATH
3. 配置文件
- spark-env.sh
export JAVA_HOME= /usr/local/src/java
export HADOOP_HOME= /usr/local/src/hadoop
export SCALA_HOME= /usr/local/src/scala
export SPARK_MASTER_IP= master
export SPARK_MASTER_PORT= 7077
export SPARK_DIST_CLASSPATH= $( /usr/local/src/hadoop/bin/hadoop classpath)
export HADOOP_CONF_DIR= /usr/local/src/hadoop/etc/hadoop
export SPARK_YARN_USER_ENV= "CLASSPATH=/usr/local/src/hadoop/etc/hadoop"
export YARN_CONF_DIR= /usr/local/src/hadoop/etc/hadoop
- slaves
master
slave1
slave2