将数据建立软链接,并同步到文件夹里,供flume采集
#! /bin/bash
mydate=`date +%Y-%m-%d-%H`
#mydate=`date -d "yesterday" +%Y%m%d`
monitorDir="/data/webapp/d5wx-webapp/logs/monitor/"
filePath="/data/webapp/d5wx-webapp/logs/response/"
fileName="response-""${mydate}"".log"
echo "文件地址:${filePath}"
echo "文件名字:${fileName}"
if [ -f "${monitorDir}""${fileName}" ] && [ -f "${monitorDir}""${fileName}"".OK" ];
then
echo "---软连接文件存在---"
elif [ ! -f "${monitorDir}""${fileName}" ] && [ ! -f "${monitorDir}""${fileName}"".OK" ] && [ -f "${filePath}""${fileName}" ] ;
then
echo "---软链接不存在---"
ln -s "${filePath}""${fileName}" "${monitorDir}""${fileName}"
fi
exit
在生产服务器上的flume配置文件,将日志信息发送到kafka的上
############################################
# 对各个组件的描述说明
# 其中a1为agent的名字
# r1是a1的source的代号名字
# c1是a1的channel的代号名字
# k1是a1的sink的代号名字
############################################
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir =/data/adver-webapp/adver-webapp/logs/monitor
a1.sources.r1.fileSuffix = .OK
a1.sources.r1.deletePolicy = never
a1.sources.r1.fileHeader = true
# 用于描述sink,类型是日志格式
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = adver
a1.sinks.k1.brokerList =192.168.152.34:44444
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
# 用于描述channel,在内存中做数据的临时的存储
a1.channels.c1.type = memory
# 该内存中最大的存储容量,1000个events事件
a1.channels.c1.capacity = 1000
# 能够同时对100个events事件监管事务
a1.channels.c1.transactionCapacity = 100
# 将a1中的各个组件建立关联关系,将source和sink都指向了同一个channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
从kafka的主题topic采集信息,置于hdfs
a1.sources = r1
a1.channels = ch1
a1.sinks = s1
# The channel can be defined as follows.
a1.sources.r1.type=org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.zookeeperConnect=127.0.0.1:2181
a1.sources.r1.topic=adver
#agent.sources.kafkaSource.groupId=flume
a1.sources.r1.kafka.consumer.timeout.ms=100
a1.channels.ch1.type=memory
a1.channels.ch1.capacity=1000
a1.channels.ch1.transactionCapacity=100
# the sink of hdfs
a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://192.168.152.34:8020/foriseholdings/Algorithm/applogs/1000/%Y%m%d
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.round = true
a1.sinks.s1.hdfs.roundValue = 3
a1.sinks.s1.hdfs.roundUnit = minute
a1.sinks.s1.hdfs.useLocalTimeStamp = true
a1.sinks.s1.hdfs.rollCount = 0
a1.sinks.s1.hdfs.rollSize = 134217728
a1.sources.r1.channels = ch1
a1.sinks.s1.channel = ch1