1、多机同时执行(xcall.sh)
#! /bin/bash
if [ $# -eq 0 ]
then
echo "请追加参数!(例如:jps)"
exit
fi
for hostname in hadoop11 hadoop12 hadoop13
do
echo "============================【$hostname】============================"
ssh root@$hostname "source /etc/profile;$@"
done
2、分发脚本(myscp.sh)
# !/bin/bash
if(( $# == 0));then
echo "请追加文件绝对路径!";
exit;
fi
pname=$1
a=$(dirname $1)
user=$(whoami)
b=`cd $(dirname $pname);pwd`
for host in hadoop12 hadoop13
do
echo "scp -r $pname $user@$host:$b"
scp -r $pname $user@$host:$b
done
3、zookeeper启动、关闭、状态
#! /bin/bash
if [ $# -eq 0 ]
then
echo "请追加参数(start or stop or status)"
exit
fi
#报错的话执行【sed -i 's/\r$//' zk】
case $1 in
"start")
for hostname in hadoop11 hadoop12 hadoop13
do
echo "====================【$hostname已启动zk】===================="
ssh root@$hostname "source /etc/profile;zkServer.sh start"
done
;;
"stop")
for hostname in hadoop11 hadoop12 hadoop13
do
echo "====================【$hostname已关闭zk】===================="
ssh root@$hostname "source /etc/profile;zkServer.sh stop"
done
;;
"status")
for hostname in hadoop11 hadoop12 hadoop13
do
echo "====================【$hostname的状态zk】===================="
ssh root@$hostname "source /etc/profile;zkServer.sh status"
done
;;
esac
4、kafka启动、关闭
#! /bin/bash
if [ $# -eq 0 ]
then
echo "请追加参数(start or stop)"
exit
fi
#报错的话执行【sed -i 's/\r$//' kafka】
case $1 in
"start")
for hostname in hadoop11 hadoop12 hadoop13
do
echo "====================【$hostname已启动kafka】===================="
ssh root@$hostname "source /etc/profile;zkServer.sh start;/opt/installs/kafka0.11/bin/kafka-server-start.sh -daemon /opt/installs/kafka0.11/config/server.properties"
done
;;
"stop")
for hostname in hadoop11 hadoop12 hadoop13
do
echo "====================【$hostname已关闭kafka】===================="
ssh root@$hostname "source /etc/profile;/opt/installs/kafka0.11/bin/kafka-server-stop.sh stop"
done
;;
esac
5、执行jar包生成数据+定时器(java-data.sh + crontab)
#! /bin/bash
source /etc/profile
java -jar ~/log-collector-1.0-SNAPSHOT-jar-with-dependencies.jar
-----------------------------------------------------------------------
crontab -e
#45 9 * * * java -jar ~/log-collector-1.0-SNAPSHOT-jar-with-dependencies.jar
#0 9 * * * /opt/installs/jdk1.8/bin/java -jar ~/log-collector-1.0-SNAPSHOT-jar-with-dependencies.jar
0 9 * * * sh /usr/local/bin/java-data.sh
6、flume数据采集(f1.sh 本地到kafka)
#! /bin/bash
case $1 in
"start")
nohup flume-ng agent --conf /opt/installs/flume1.9/conf --name a1
--conf-file /opt/installs/flume1.9/job/mall-project/taildir-kafka.conf >/opt/installs/flume1.9/out.txt 2>&1 &
;;
"stop")
#ps -ef | grep Application | grep -v grep | awk '{print $2}' | xargs kill -9
ps -ef | grep taildir-kafka.conf | grep -v grep | awk '{print $2}' | xargs kill -9
;;
*)
echo "arg is start or stor"
;;
esac
a1.sources = r1
a1.channels = c1 c2
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /tmp/logs/^app.+.log$
#自定义拦截器
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.yh.interceptor.MallInterceptor$Builder
#多路复用选择器
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = topic
a1.sources.r1.selector.mapping.topic_start = c1
a1.sources.r1.selector.mapping.topic_event = c2
a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c1.kafka.bootstrap.servers = hadoop10:9092
a1.channels.c1.kafka.topic = topic_start
a1.channels.c1.parseAsFlumeEvent = false
a1.channels.c2.type = org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c2.kafka.bootstrap.servers = hadoop10:9092
a1.channels.c2.kafka.topic = topic_event
a1.channels.c2.parseAsFlumeEvent = false
a1.sources.r1.channels = c1 c2
7、flume数据采集(f2.sh kafka到hdfs)
#! /bin/bash
case $1 in
"start")
nohup flume-ng agent --conf /opt/installs/flume1.9/conf --name a1
--conf-file /opt/installs/flume1.9/job/mall-project/kafka-memory-hdfs.conf >/opt/installs/flume1.9/out2.txt 2>&1 &
;;
"stop")
#ps -ef | grep Application | grep -v grep | awk '{print $2}' | xargs kill -9
ps -ef | grep kafka-memory-hdfs.conf | grep -v grep | awk '{print $2}' | xargs kill -9
;;
*)
echo "arg is start or stor"
;;
esac
a1.sources = r1 r2
a1.channels = c1 c2
a1.sinks = k1 k2
a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = hadoop10:9092
a1.sources.r1.kafka.topics = topic_start
a1.sources.r1.kafka.consumer.auto.offset.reset=earliest
a1.sources.r1.kafka.consumer.group.id=G11_Start
a1.sources.r2.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.r2.kafka.bootstrap.servers = hadoop10:9092
a1.sources.r2.kafka.topics = topic_event
a1.sources.r2.kafka.consumer.auto.offset.reset=earliest
a1.sources.r2.kafka.consumer.group.id=G22_Start
a1.channels.c1.type = memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=1000
a1.channels.c2.type=memory
a1.channels.c2.capacity=1000
a1.channels.c2.transactionCapacity=1000
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hadoop10:9000/origin_data/gmall/log/topic_start/%Y-%m-%d
a1.sinks.k1.hdfs.filePrefix = logstart-
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.rollInterval = 30
a1.sinks.k1.hdfs.rollSize = 1048576
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k2.type = hdfs
a1.sinks.k2.hdfs.path = hdfs://hadoop10:9000/origin_data/gmall/log/topic_event/%Y-%m-%d
a1.sinks.k2.hdfs.filePrefix = logevent-
a1.sinks.k2.hdfs.useLocalTimeStamp = true
a1.sinks.k2.hdfs.rollInterval = 30
a1.sinks.k2.hdfs.rollSize = 1048576
a1.sinks.k2.hdfs.rollCount = 0
a1.sinks.k2.hdfs.fileType = DataStream
a1.sources.r1.channels = c1
a1.sources.r2.channels = c2
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2