文章目录
flume安装
1、解压并改名
tar -zxf flume-ng-1.6.0-cdh5.14.0.tar.gz
mv apache-flume-1.6.0-cdh5.14.0-bin/ flume
2、修改文件
cd conf
cp flume-env.sh.template flume-env.sh
vi flume-env.sh
//jdk
export JAVA_HOME=/usr/java/jdk1.8.0_131
//
export JAVA_OPTS="-Xms10240m -Xmx10240m -Dcom.sun.management.jmxremote"
netcat
Netcat是一款非常出名的网络工具,简称“NC”,有渗透测试中的“瑞士军刀”之称。它可以用作端口监听、端口扫描、远程文件传输、还可以实现远程shell等功能
netcat使用
https://blog.csdn.net/Yy10205473/article/details/112511239?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522162182175016780271538649%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=162182175016780271538649&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-112511239.first_rank_v2_pc_rank_v29&utm_term=netcat&spm=1018.2226.3001.4187
//安装nc
yum install -y nc
//安装telnet
yum install -y telnet.*
nc -lk 44444
telnet localhost 44444
channel类型为memory,source类型为netcat
使用flume实现
新建netcat-flume-logger.conf
a1.sources=r1
a1.sinks=k1
a1.channels=c1
a1.sources.r1.type=netcat
a1.sources.r1.bind=localhost
a1.sources.r1.port=44444
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapatity=100
a1.sinks.k1.type=logger
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
./bin/flume-ng agent --name a1 --conf ./conf/ --conf-file ./conf/kb11job/netcat-flume-logger.conf -Dflume.root.logger=INFO,console
source接收类型为exec
a1.sources=r1
a1.sinks=s1
a1.channels=c1
a1.sources.r1.type=exec
a1.sources.r1.command=tail -f /opt/flumelogfile/flumedemo.log
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.sinks.s1.type=logger
a1.sources.r1.channels=c1
a1.sinks.s1.channel=c1
channel类型为file,source类型为spooldir
events.sources=eventsSource
events.channels=eventsChannel
events.sinks=eventsSink
events.sources.eventsSource.type=spooldir
events.sources.eventsSource.spoolDir=/opt/flumelogfile/events
events.sources.eventsSource.deserializer=LINE
events.sources.eventsSource.deserializer.maxLineLength=32000
events.sources.eventsSource.includePattern=events_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
events.channels.eventsChannel.type=file
events.channels.eventsChannel.checkpointDir=/opt/checkpoint/events
events.channels.eventsChannel.dataDirs=/opt/data/events
events.sinks.eventsSink.type=logger
events.sources.eventsSource.channels=eventsChannel
events.sinks.eventsSink.channel=eventsChannel
./bin/flume-ng agent --name events --conf ./conf/ --conf-file ./conf/kb11job/events-flume-logger.conf -Dflume.root.logger=INFO,console
sink 将文件存放在hdfs中
hdfs dfsadmin -safemode leave
hdfs dfs -mkdir -p /kb11file/userfriend
user_friend.sources=userfriendSource
user_friend.channels=userfriendChannel
user_friend.sinks=userfriendSink
user_friend.sources.userfriendSource.type=spooldir
user_friend.sources.userfriendSource.spoolDir=/opt/flumelogfile/events
user_friend.sources.userfriendSource.deserializer=LINE
user_friend.sources.userfriendSource.deserializer.maxLineLength=32000
user_friend.sources.userfriendSource.includePattern=user_friends-[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
user_friend.channels.userfriendChannel.type=file
user_friend.channels.userfriendChannel.checkpointDir=/opt/checkpoint/user_friend
user_friend.channels.userfriendChannel.dataDirs=/opt/data/user_friend
user_friend.sinks.userfriendSink.type=hdfs
user_friend.sinks.userfriendSink.hdfs.fileType=DataStream
user_friend.sinks.userfriendSink.hdfs.filePrefix=userfriend
user_friend.sinks.userfriendSink.hdfs.fileSuffix=.csv
user_friend.sinks.userfriendSink.hdfs.path=hdfs://192.168.232.211:9000/kb11file/userfriend/%Y-%m-%d
user_friend.sinks.userfriendSink.hdfs.useLocalTimeStamp=true
user_friend.sinks.userfriendSink.hdfs.batchSize=640
user_friend.sinks.userfriendSink.hdfs.rollCount=0
user_friend.sinks.userfriendSink.hdfs.rollSize=6400000
user_friend.sinks.userfriendSink.hdfs.rollInterval=30
user_friend.sources.userfriendSource.channels=userfriendChannel
user_friend.sinks.userfriendSink.channel=userfriendChannel
./bin/flume-ng agent --name userfriend --conf ./conf/ --conf-file ./conf/kb11job/user_friends-hdfs.conf -Dflume.root.logger=INFO,console