注意:zookeeper、hadoop、kafka、hive、flink、spark、kafkamanager、clickhouse、sqoop、hbase、flume、redis集群安装手册,其中spark、flink、hive(mapreduce)采用的是提交至yarn资源管理器执行任务方式,所以采用单节点配置
自用版 大家可以用来当参考,本人工作之余维护个人开发项目所整理的集群搭建手册
##初始化配置
1. 创建网络 达到标准 ping www.baidu.com 即可
vi /etc/sysconfig/network-scripts/ifcfg-ens33
添加或修改:
BOOTPROTO=static
ONBOOT=yes
IPADDR=192.168.136.151
NETMASK=255.255.255.0
GATEWAY=192.168.136.2
DNS1=114.114.114.114
预期效果:
[root@slave2 config]# ping www.baidu.com
PING www.a.shifen.com (36.152.44.95) 56(84) bytes of data.
64 bytes from 36.152.44.95 (36.152.44.95): icmp_seq=1 ttl=128 time=10.2 ms
64 bytes from 36.152.44.95 (36.152.44.95): icmp_seq=2 ttl=128 time=10.2 ms
64 bytes from 36.152.44.95 (36.152.44.95): icmp_seq=3 ttl=128 time=11.8 ms
2. 绑定命名变量和ip
vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.136.131 master
192.168.136.132 slave1
192.168.136.133 slave2
3.reboot 重启
4.关闭防火墙
systemctl disable firewalld.service
systemctl stop firewalld.service
systemctl status firewalld.service
5.ssh免密通信
ssh-keygen -t rsa
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
ssh-copy-id node1
ssh-copy-id node2
ssh-copy-id node3
相互 ssh 节点名称 互通不用密码验证即表示没问题
6.jdk安装
yum -y install tar
yum -y install vim
tar -zxvf jdk-8u191-linux-x64.tar.gz -C /opt/apps/
vim /etc/profile
添加:
export JAVA_HOME=/opt/apps/jdk1.8.0_191/
export PATH=$PATH:$JAVA_HOME/bin
source /etc/profile
java -version
另外两个节点安装
scp -r /opt/apps slave2:/opt/
scp -r /opt/apps slave1:/opt/
scp -r /etc/profile slave1:/etc/profile
scp -r /etc/profile slave2:/etc/profile
java -version
7.时间同步
yum -y install ntp
crontab -e
添加:
*/10 * * * * /usr/sbin/ntpdate -u pool.ntp.org
安装Zookeeper
tar -zxf /opt/soft/zookeeper-3.4.6.tar.gz -C /opt/apps/
cp conf/zoo_sample.cfg conf/zoo.cfg
vim conf/zoo.cfg
添加:
dataDir=/opt/data/zookeeper
dataLogDir=/opt/data/zookeeper/log
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
scp -r zookeeper-3.4.6 slave1:/opt/apps/
scp -r zookeeper-3.4.6 slave2:/opt/apps/
mkdir -p /opt/data/zookeeper
echo 1 >> /opt/data/zookeeper/myid 节点1 master
echo 2 >> /opt/data/zookeeper/myid 节点2 slave1
echo 3 >> /opt/data/zookeeper/myid 节点3 slave2
echo "export ZOOKEEPER_HOME=/opt/apps/zookeeper-3.4.6" >> /etc/profile
echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile
source /etc/profile
mkdir /opt/data/zookeeper/log
启动:
zkServer.sh start
状态:
zkServer.sh status
关闭:
zkServer.sh stop
群起脚本
#!/bin/bash
if [ $# -eq 0 ]
then
echo "please input param: start stop"
else
for i in {'master','slave1','slave2'}
do
echo "${i}"
ssh ${i} "source /etc/profile;/opt/apps/zookeeper-3.4.6/bin/zkServer.sh ${1}"
done
if [ $1 = start ]
then
sleep 3
for i in {1..3}
do
echo "checking doitedu0${i}"
ssh ${i} "source /etc/profile;/opt/apps/zookeeper-3.4.6/bin/zkServer.sh status"
done
fi
fi
安装Hadoop
tar -zxf hadoop-3.1.1.tar.gz -C /opt/apps/
echo "export HADOOP_HOME=/opt/apps/hadoop-3.1.1" >> /etc/profile
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> /etc/profile
source /etc/profile
vim core-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:8020/</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.users</name>
<value>hdfs,impala,hive,hbase,yarn</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
</configuration>
echo "export JAVA_HOME=$JAVA_HOME" >> hadoop-env.sh
vim hdfs-site.xml
<configuration>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/data/hdpdata/data</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/data/hdpdata/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/opt/data/hdpdata/secname/</value>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value>master:8020</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>slave1:50090</value>
</property>
</configuration>
vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
vim yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>6144</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<description>Whether to enable log aggregation</description>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.6</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.nodemanager.resource.detect-hardware-capabilities </name>
<value>true</value>
</property>
<!-- 配置日志过期时间,单位秒 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
</configuration>
echo "master" >> workers
echo "slave1" >> workers
echo "slave2" >> workers
sed -i '1iHDFS_DATANODE_USER=root' /opt/apps/hadoop-3.1.1/sbin/start-dfs.sh
sed -i '1iHDFS_DATANODE_SECURE_USER=hdfs' /opt/apps/hadoop-3.1.1/sbin/start-dfs.sh
sed -i '1iHDFS_NAMENODE_USER=root' /opt/apps/hadoop-3.1.1/sbin/start-dfs.sh
sed -i '1iHDFS_SECONDARYNAMENODE_USER=root' /opt/apps/hadoop-3.1.1/sbin/start-dfs.sh
sed -i '1iYARN_RESOURCEMANAGER_USER=root' /opt/apps/hadoop-3.1.1/sbin/start-yarn.sh
sed -i '1iHADOOP_SECURE_DN_USER=yarn' /opt/apps/hadoop-3.1.1/sbin/start-yarn.sh
sed -i '1iYARN_NODEMANAGER_USER=root' /opt/apps/hadoop-3.1.1/sbin/start-yarn.sh
sed -i '1iYARN_RESOURCEMANAGER_USER=root' /opt/apps/hadoop-3.1.1/sbin/stop-yarn.sh
sed -i '1iHADOOP_SECURE_DN_USER=yarn' /opt/apps/hadoop-3.1.1/sbin/stop-yarn.sh
sed -i '1iYARN_NODEMANAGER_USER=root' /opt/apps/hadoop-3.1.1/sbin/stop-yarn.sh
vim /opt/apps/hadoop-3.1.1/sbin/start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim /opt/apps/hadoop-3.1.1/sbin/start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vim /opt/apps/hadoop-3.1.1/sbin/stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
scp -r /opt/apps/hadoop-3.1.1 slave1:/opt/apps/
scp -r /opt/apps/hadoop-3.1.1 slave2:/opt/apps/
master主节点执行# hdfs namenode -format
master主节点执行# start-dfs.sh
开启历史日志服务
echo "mr-jobhistory-daemon.sh start historyserver" >> /opt/apps/hadoop-3.1.1/etc/hadoop/start-all.sh
安装KAFKA
tar -zxf /opt/soft/kafka_2.11-2.0.0.tgz -C /opt/apps/
mv kafka_2.11-2.0.0 kafka_2.0.0
vim kafka_2.0.0/config/server.properties
添加或修改
broker.id=1 # 我设置的是master:1 slave1:2 slave2:3
host.name=192.168.136.151 #当前主机ip
log.retention.hours=24
zookeeper.connect=master:2181,slave1:2181,slave2:2181
scp -r /opt/apps/kafka_2.0.0 slave1:/opt/apps/
scp -r /opt/apps/kafka_2.0.0 slave2:/opt/apps/
echo "export KAFKA_HOME=/opt/apps/kafka_2.0.0" >> /etc/profile
echo 'export PATH=$PATH:$KAFKA_HOME/bin' >> /etc/profile
source /etc/profile
群起脚本 kakall.sh
#!/bin/bash
if [ $# -eq 0 ]
then
echo "please input param: start stop"
else
if [ $1 = start ]
then
for i in {'master','slave1','slave2'}
do
echo "${i}"
ssh ${i} "source /etc/profile;/opt/apps/kafka_2.0.0/bin/kafka-server-start.sh -daemon /opt/apps/kafka_2.0.0/config/server.properties"
done
fi
if [ $1 = stop ]
then
for i in {'master','slave1','slave2'}
do
ssh ${i} "source /etc/profile;/opt/apps/kafka_2.0.0/bin/kafka-server-stop.sh"
done
fi
fi
安装Hive
[slave1@root]> yum -y install wget
[slave1@root]> wget https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm
[slave1@root]> yum -y localinstall mysql57-community-release-el7-11.noarch.rpm
[slave1@root]> yum -y install mysql-community-server #耗时
设置服务自启
[slave1@root]> systemctl start mysqld
[slave1@root]> systemctl enable mysqld
[slave1@root]> grep 'password' /var/log/mysqld.log
[slave1@root]> mysql -uroot -p
mysql > set global validate_password_policy=LOW;
mysql > set global validate_password_length=6;
mysql > SET PASSWORD = PASSWORD('123456');
mysql > grant all privileges on *.* to 'root'@'%' identified by '123456' with grant
option;
mysql > flush privileges;
mysql > quit;
tar -zxf apache-hive-3.1.2-bin.tar.gz -C /opt/apps/
touch hive-site.xml
echo "export HIVE_HOME=/opt/apps/hive-3.1.2" >> /etc/profile
echo 'export PATH=$PATH:$HIVE_HOME/bin' >> /etc/profile
source /etc/profile
vim hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://slave1:3306/hive?createDatabaseIfNotExist=true&useSSL=false&serverTimezone=Asia/Shanghai&useUnicode=true&characterEncoding=utf8
</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/user/hive/tmp</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/user/hive/log</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.server2.webui.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<property>
<name>hive.server2.long.polling.timeout</name>
<value>5000</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>datanucleus.fixedDatastore</name>
<value>true</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://master:9083</value>
</property>
<property>
<name>hive.server2.thrift.client.user</name>
<value>root</value>
</property>
<property>
<name>hive.server2.thrift.client.password</name>
<value>root</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>master,slave1,slave2</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
</property>
</configuration>
执行:
# 在hive的lib目录中,放入mysql驱动连接jar包
schematool -initSchema -dbType mysql
hive --service metastore
安装Flink
vim /opt/apps/flink-1.13.2/conf/flink.conf
jobmanager.memory.task.off-heap.size: 128m
jobmanager.memory.jvm-metaspace.size: 128m
taskmanager.memory.jvm-metaspace.size: 128m
taskmanager.memory.framework.heap.size: 64m
taskmanager.memory.framework.off-heap.size: 64m
state.checkpoints.num-retained: 10
jobmanager.execution.failover-strategy: region
jobmanager.heap.size: 256m
jobmanager.memory.flink.size: 512m
taskmanager.memory.flink.size: 512m
taskmanager.numberOfTaskSlots: 1
安装Spark
安装scala
tar -zxf /opt/soft/scala-2.12.12.tgz -C /opt/apps/
echo "export SCALA_HOME=/opt/apps/scala-2.12.12" >> /etc/profile
echo 'export PATH=$PATH:$SCALA_HOME/bin' >> /etc/profile
source /etc/profile
echo "export SPARK_HOME=/opt/apps/spark-2.4.4" >> /etc/profile
echo 'export PATH=$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin' >> /etc/profile
source /etc/profile
echo "master" >> /opt/apps/spark-2.4.4/conf/slaves
echo "slave1" >> /opt/apps/spark-2.4.4/conf/slaves
echo "slave2" >> /opt/apps/spark-2.4.4/conf/slaves
vim conf/spark-env.sh
export YARN_CONF_DIR=/opt/apps/hadoop-3.1.1/etc/hadoop
export HADOOP_CONF_DIR=/opt/apps/hadoop-3.1.1/etc/hadoop
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18018 -Dspark.history.fs.logDirectory=hdfs:///user/spark/applicationHistory"
vim conf/spark-defaults.conf
spark.eventLog.dir=hdfs:///user/spark/applicationHistory
spark.eventLog.enabled=true
spark.yarn.historyServer.address=http://master:18018
在HIVE_HOME/bin目录下创建start-hive.sh
vim start-hive.sh
nohup hive --service metastore 1>/dev/null 2>&1 &
nohup hiveserver2 1>/dev/null 2>&1 &
nohup start-history-server.sh 1>/dev/null 2>&1 &
hdfs dfs -mkdir /user/spark/applicationHistory
安装KAFKA-MANAGER
yum -y install unzip
unzip unzip kafka-manager-2.0.0.2.zip
mv kafka-manager-2.0.0.2 /opt/apps/
vim /opt/apps/kafka-manager-2.0.0.2/conf/application.conf
#kafka-manager.zkhosts="kafka-manager-zookeeper:2181"
kafka-manager.zkhosts="192.168.136.151:2181,192.168.136.152:2181,192.168.136.153:2181"
nohup /opt/apps/kafka-manager-2.0.0.2/bin/kafka-manager -Dconfig.file=/opt/apps/kafka-manager-2.0.0.2/conf/application.conf -Dhttp.port=9090 1>/dev/null 2>&1 &
安装Redis
yum install wget
# 更新gcc版本
yum install gcc-c++
yum -y install centos-release-scl
yum -y install devtoolset-9-gcc devtoolset-9-gcc-c++ devtoolset-9-binutils
scl enable devtoolset-9 bash
wget http://download.redis.io/releases/redis-6.0.3.tar.gz 或者 到官网下载
tar -zxvf /opt/soft/redis-6.0.0.tar.gz -C /opt/apps/redis-6.0.0
cd /opt/apps/redis-6.0.0
make
make install
cp /opt/apps/redis-6.0.0/redis.conf /usr/local/reids/bin
vim redis.conf
daemonize yes
protected-mode no
bind 0.0.0.0
后台启动
/usr/local/reids/binredis-server /usr/local/reids/bin/redis.conf
设置开机启动
vi /etc/systemd/system/redis.service
[Unit]
Description=redis-server
After=network.target
[Service]
Type=forking
ExecStart=/usr/local/redis/bin/redis-server /usr/local/redis/bin/redis.conf
PrivateTmp=true
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start redis.service
systemctl enable redis.service
ln -s /usr/local/redis/bin/redis-cli /usr/bin/redis
安装Clickhouse
yum install -y curl
curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | sudo bash
yum install -y clickhouse-server
yum install -y clickhouse-client
安装SQOOP
/opt/apps/sqoop-1.4.7
echo "export SQOOP_HOME=/opt/apps/sqoop-1.4.7" >> /etc/profile
echo 'export PATH=$PATH:$SQOOP_HOME/bin' >> /etc/profile
source /etc/profile
cp /opt/apps/sqoop-1.4.7/conf/sqoop-env-template.sh /opt/apps/sqoop-1.4.7/conf/sqoop-env.sh
echo 'export HADOOP_COMMON_HOME=/opt/apps/hadoop-3.1.1' >> sqoop-env.sh
echo 'export HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.1.1' >> sqoop-env.sh
echo 'export HIVE_HOME=/opt/apps/hive-3.1.2' >> sqoop-env.sh
cp /opt/apps/hive-3.1.2/lib/mysql-connector-java-5.1.47-bin.jar /opt/apps/sqoop-1.4.7/lib/
if [ $# -eq 0 ]
then
echo "please input param: start stop"
else
if [ $1 = start ]
then
ssh slave2 "source /etc/profile;/opt/apps/azkaban-2.5.0/server/bin/azkaban-web-start.sh;/opt/apps/azkaban-2.5.0/executor/bin/azkaban-executor-start.sh;"
fi
if [ $1 = stop ]
then
ssh slave2 "source /etc/profile;/opt/apps/azkaban-2.5.0/server/bin/azkaban-web-shutdown.sh;/opt/apps/azkaban-2.5.0/executor/bin/azkaban-executor-shutdown.sh;"
fi
fi
azkaban-web-shutdown.sh
安装Hbase
tar -zxvf hbase-2.4.9-bin.tar.gz -C /opt/apps/
vim hbase-env.sh
export JAVA_HOME=/opt/apps/jdk1.8.0_191
export HBASE_CLASSPATH=/opt/apps/hbase-2.4.9/conf
export HBASE_MANAGES_ZK=false
vim regionservers
master
slave1
slave2
echo "export HBASE_HOME=/opt/apps/hbase-2.4.9" >> /etc/profile
echo 'export PATH=$PATH:$HBASE_HOME/bin' >> /etc/profile
source /etc/profile
开启hbase
start-hbase.sh
ip:16010访问
vim hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:8020/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>doitedu01,doitedu02,doitedu03</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
安装Flume
# 以下这些文件上传至 /opt/soft目录下
inpathToHdfs.properties
apache-flume-1.6.0-bin.tar.gz
flume-ng-core-1.6.0.jar
# 创建安装目录
mkdir -p /opt/apps
mkdir -p /opt/soft
# flume监控目录
mkdir -p /tmp/logs
# 以将flume安装包, 放到 /opt/soft 安装到 /opt/apps为例
cd /opt/soft
# 解压flume安装包
tar -zxvf apache-flume-1.6.0-bin.tar.gz
# 转移目录
mv /opt/soft/apache-flume-1.6.0-bin /opt/apps/apache-flume-1.6.0
# 添加jar包依赖
cd /opt/apps/apache-flume-1.6.0/lib
# 替换jar包名称
mv flume-ng-core-1.6.0.jar flume-ng-core-1.6.0.jar-bak
cp /opt/soft/flume-ng-core-1.6.0.jar /opt/apps/apache-flume-1.6.0/lib/
# 配置properties目录
cd /opt/apps/apache-flume-1.6.0
mkdir properties
cp /opt/soft/inpathToHdfs.properties /opt/apps/apache-flume-1.6.0/properties/
# 启动flume任务
cd /opt/apps/apache-flume-1.6.0
nohup bin/flume-ng agent -c conf -f properties/inpathToHdfs.properties -n a1 >/dev/null 2>&1 &
# 过1-2分钟查看 /opt/apps/apache-flume-1.6.0/logs目录是否存在
# Name the components on this agent
a1.sources = r1
a1.channels = c1
a1.sinks = k1
# Describe/configure the source
a1.sources.r1.type = spooldir
# 监控的目录
a1.sources.r1.spoolDir = /tmp/logs
#忽略所有以.tmp 结尾的文件,不上传
#a1.sources.r1.ignorePattern = ([^]*\.tmp)
#a1.sources.r1.basenameHeader = true
a1.sources.r1.fileHeader = true
a1.sources.r1.fileHeaderKey = fileName
# Describe the sink
a1.sinks.k1.type = hdfs
# 要上传至hdfs的目录
a1.sinks.k1.hdfs.path = hdfs://master:8020/flume/%Y%m%d/
# 是否按照时间滚动文件夹
a1.sinks.k1.hdfs.round = true
# 多久创建一个新的文件夹
a1.sinks.k1.hdfs.roundValue = 1
# 定义时间单位
a1.sinks.k1.hdfs.roundUnit = hour
# 是否使用本地时间戳(必须配置)
a1.sinks.k1.hdfs.useLocalTimeStamp = true
# 积累多少个Event才flush到HDFS一次(单位为事件)
a1.sinks.k1.hdfs.batchSize = 100
# 设置文件类型,可支持压缩
a1.sinks.k1.hdfs.fileType = DataStream
# 多久滚动生成一个新的文件(单位为秒)
a1.sinks.k1.hdfs.rollInterval = 30
# 设置每个文件的滚动大小(略小于文件块大小128M)
a1.sinks.k1.hdfs.rollSize = 134217700
# 文件的滚动与Event数量无关(0则不按照该值)
a1.sinks.k1.hdfs.rollCount = 0
# 设置文件前缀
a1.sinks.k1.hdfs.filePrefix = %{fileName}
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
# 配置namespace
redis集群安装
1、在/uar/local/创建redis-cluster目录
mkdir redis-cluster
2、在redis-cluster目录下创建节点目录
mkdir 7001 7002 7003 7004 7005 7006
3、复制一份redis.conf文件到7001节点目录
cp redis.conf ../redis-cluster/7001
4、修改redis-cluster/7001/redis.conf
bind 0.0.0.0
daemonize yes (设置后台运行redis)
cluster-enabled yes (开启集群)
cluster-node-timeout 15000 (设置请求超时时间,默认为15秒,可以自行修改)
appendonly yes (aop日志开启,会每次进行写操作都记录一条日志)
port 7001(和目录端口一致)
pidfile /var/run/redis_7001.pid
dbfilename dump_7001.rdb
appendfilename "appendonly_7001.aof"
cluster-config-file nodes_7001.conf
# 设置密码
masterauth 123456
requirepass 123456
5、修改完成后,把redis.conf 文件复制到其他节点中,并修改不同端口部分:
# 当前目录:redis-cluster/7001/
cp redis.conf ../7002/
cp redis.conf ../7003/
cp redis.conf ../7004/
cp redis.conf ../7005/
cp redis.conf ../7006/
6、编辑群起脚本 start-all.sh
#! /bin/bash
REDIS_HOME=/usr/local/redis-cluster
cd $REDIS_HOME/7001 && ./redis-server redis.conf
cd $REDIS_HOME/7002 && ./redis-server redis.conf
cd $REDIS_HOME/7003 && ./redis-server redis.conf
cd $REDIS_HOME/7004 && ./redis-server redis.conf
cd $REDIS_HOME/7005 && ./redis-server redis.conf
cd $REDIS_HOME/7006 && ./redis-server redis.conf
7、赋予权限
chmod 755 start-all.sh
注:查看进程是否启动,并确定端口:
ps -ef|grep redis|grep cluster
8、创建集群
./redis-cli -a 123456 --cluster create 192.168.136.152:7001 192.168.136.152:7002 192.168.136.152:7003 192.168.136.152:7004 192.168.136.152:7005 192.168.136.152:7006 --cluster-replicas 1
--cluster-replicas 1 :表示一个master挂载几个slave,至少需要3个master,此处设置为1 至少需要 6节点,如果设置为2 至少需要9个节点。
9、关闭集群
#! /bin/bash
REDIS_HOME=/usr/local/redis-cluster
cd $REDIS_HOME/7001 && ./redis-cli -c -h 192.168.136.152 -p 7001 shutdown
cd $REDIS_HOME/7002 && ./redis-cli -c -h 192.168.136.152 -p 7002 shutdown
cd $REDIS_HOME/7003 && ./redis-cli -c -h 192.168.136.152 -p 7003 shutdown
cd $REDIS_HOME/7004 && ./redis-cli -c -h 192.168.136.152 -p 7004 shutdown
cd $REDIS_HOME/7005 && ./redis-cli -c -h 192.168.136.152 -p 7005 shutdown
cd $REDIS_HOME/7006 && ./redis-cli -c -h 192.168.136.152 -p 7006 shutdown
cd /usr/local/redis-cluster/7001 && rm -f `ls |grep _7001`
cd /usr/local/redis-cluster/7002 && rm -f `ls |grep _7002`
cd /usr/local/redis-cluster/7003 && rm -f `ls |grep _7003`
cd /usr/local/redis-cluster/7004 && rm -f `ls |grep _7004`
cd /usr/local/redis-cluster/7005 && rm -f `ls |grep _7005`
cd /usr/local/redis-cluster/7006 && rm -f `ls |grep _7006`