event_attendees.conf
a1.channels = c1
a1.sources = s1
a1.sinks = k1
a1.sources.s1.type = spooldir
a1.sources.s1.channels = c1
a1.sources.s1.spoolDir = /opt/data/event_attendees
a1.sources.s1.deserializer.maxLineLength=120000
a1.sources.s1.interceptors=i1
a1.sources.s1.interceptors.i1.type = regex_filter
a1.sources.s1.interceptors.i1.regex = \s*event.*
a1.sources.s1.interceptors.i1.excludeEvents=true
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/flumekfk/checkpoint
a1.channels.c1.dataDirs = /opt/flumekfk/data
a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = event_attendees
a1.sinks.k1.kafka.bootstrap.servers = 192.168.59.130:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1
a1.sinks.k1.kafka.producer.linger.ms = 10
a1.sinks.k1.kafka.producer.batch.size=524288
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic event_attendees --replication-factor 1 --partitions 1
flume-ng agent -n a1 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/event_attendees.conf -Dflume.root.logger=info,console
kafka-console-consumer.sh --bootstrap-server 192.168.59.130:9092 --topic event_attendees --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic event_attendees --time -1
events.conf
a2.channels = c2
a2.sources = s2
a2.sinks = k2
a2.sources.s2.type = spooldir
a2.sources.s2.channels = c2
a2.sources.s2.spoolDir = /opt/data/events
a2.sources.s2.interceptors=i2
a2.sources.s2.interceptors.i2.type = regex_filter
a2.sources.s2.interceptors.i2.regex = \s*event_id.*
a2.sources.s2.interceptors.i2.excludeEvents=true
a2.channels.c2.type = file
a2.channels.c2.checkpointDir = /opt/flumekfk/checkpoint
a2.channels.c2.dataDirs = /opt/flumekfk/data
a2.sinks.k2.channel = c2
a2.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
a2.sinks.k2.kafka.topic = events
a2.sinks.k2.kafka.bootstrap.servers = 192.168.59.130:9092
a2.sinks.k2.kafka.flumeBatchSize = 20
a2.sinks.k2.kafka.producer.acks = 1
a2.sinks.k2.kafka.producer.linger.ms = 10
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic events --replication-factor 1 --partitions 1
flume-ng agent -n a2 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/event.conf -Dflume.root.logger=info,console
kafka-console-consumer.sh --bootstrap-server 192.168.181.132:9092 --topic events --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic events --time -1
users.conf
a3.channels = c3
a3.sources = s3
a3.sinks = k3
a3.sources.s3.type = spooldir
a3.sources.s3.channels = c3
a3.sources.s3.spoolDir = /opt/data/users
a3.sources.s3.interceptors=i3
a3.sources.s3.interceptors.i3.type = regex_filter
a3.sources.s3.interceptors.i3.regex = \s*user_id.*
a3.sources.s3.interceptors.i3.excludeEvents=true
a3.channels.c3.type = file
a3.channels.c3.checkpointDir = /opt/flumekfk/checkpoint
a3.channels.c3.dataDirs = /opt/flumekfk/data
a3.sinks.k3.channel = c3
a3.sinks.k3.type = org.apache.flume.sink.kafka.KafkaSink
a3.sinks.k3.kafka.topic = users
a3.sinks.k3.kafka.bootstrap.servers = 192.168.59.130:9092
a3.sinks.k3.kafka.flumeBatchSize = 20
a3.sinks.k3.kafka.producer.acks = 1
a3.sinks.k3.kafka.producer.linger.ms = 10
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic users --replication-factor 1 --partitions 1
flume-ng agent -n a3 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/users.conf -Dflume.root.logger=INFO,console
kafka-console-consumer.sh --bootstrap-server 192.168.59.130:9092 --topic users --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic users --time -1
user_friends.conf
a4.channels = c4
a4.sources = s4
a4.sinks = k4
a4.sources.s4.type = spooldir
a4.sources.s4.channels = c4
a4.sources.s4.spoolDir = /opt/data/user_friends
a4.sources.s4.interceptors=i4
a4.sources.s4.interceptors.i4.type = regex_filter
a4.sources.s4.interceptors.i4.regex = \s*user.*
a4.sources.s4.interceptors.i4.excludeEvents=true
a4.sources.s4.deserializer.maxLineLength=60000
a4.channels.c4.type = file
a4.channels.c4.checkpointDir = /opt/flumekfk/checkpoint
a4.channels.c4.dataDirs = /opt/flumekfk/data
a4.sinks.k4.channel = c4
a4.sinks.k4.type = org.apache.flume.sink.kafka.KafkaSink
a4.sinks.k4.kafka.topic = user_friends
a4.sinks.k4.kafka.bootstrap.servers = 192.168.59.130:9092
a4.sinks.k4.kafka.flumeBatchSize = 20
a4.sinks.k4.kafka.producer.acks = 1
a4.sinks.k4.kafka.producer.linger.ms = 10
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic user_friends --replication-factor 1 --partitions 1
flume-ng agent -n a4 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/user_friends.conf -Dflume.root.logger=INFO,console
kafka-console-consumer.sh --bootstrap-server 192.168.59.130:9092 --topic user_friends --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic user_friends --time -1
train.conf
a5.channels = c5
a5.sources = s5
a5.sinks = k5
a5.sources.s5.type = spooldir
a5.sources.s5.channels = c5
a5.sources.s5.spoolDir = /opt/data/train
a5.sources.s5.interceptors=i5
a5.sources.s5.interceptors.i5.type = regex_filter
a5.sources.s5.interceptors.i5.regex = \s*user.*
a5.sources.s5.interceptors.i5.excludeEvents=true
a5.channels.c5.type = file
a5.channels.c5.checkpointDir = /opt/flumekfk/checkpoint
a5.channels.c5.dataDirs = /opt/flumekfk/data
a5.sinks.k5.channel = c5
a5.sinks.k5.type = org.apache.flume.sink.kafka.KafkaSink
a5.sinks.k5.kafka.topic = train
a5.sinks.k5.kafka.bootstrap.servers = 192.168.59.130:9092
a5.sinks.k5.kafka.flumeBatchSize = 20
a5.sinks.k5.kafka.producer.acks = 1
a5.sinks.k5.kafka.producer.linger.ms = 10
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic train --replication-factor 1 --partitions 1
flume-ng agent -n a5 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/train.conf -Dflume.root.logger=INFO,console
kafka-console-consumer.sh --bootstrap-server 192.168.59.130:9092 --topic train --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic train --time -1
test.conf
a6.channels = c6
a6.sources = s6
a6.sinks = k6
a6.sources.s6.type = spooldir
a6.sources.s6.channels = c6
a6.sources.s6.spoolDir = /opt/data/test
a6.sources.s6.interceptors=i6
a6.sources.s6.interceptors.i6.type = regex_filter
a6.sources.s6.interceptors.i6.regex = \s*user.*
a6.sources.s6.interceptors.i6.excludeEvents=true
a6.channels.c6.type = file
a6.channels.c6.checkpointDir = /root/flume/checkpoint
a6.channels.c6.dataDirs = /root/flume/data
a6.sinks.k6.channel = c6
a6.sinks.k6.type = org.apache.flume.sink.kafka.KafkaSink
a6.sinks.k6.kafka.topic = test
a6.sinks.k6.kafka.bootstrap.servers = 192.168.59.130:9092
a6.sinks.k6.kafka.flumeBatchSize = 20
a6.sinks.k6.kafka.producer.acks = 1
a6.sinks.k6.kafka.producer.linger.ms = 10
kafka-topics.sh --zookeeper 192.168.59.130:2181 --create --topic test --replication-factor 1 --partitions 1
flume-ng agent -n a6 -c /opt/software/hadoop/flume160/conf -f /opt/flumecfg/test.conf -Dflume.root.logger=INFO,console
kafka-console-consumer.sh --bootstrap-server 192.168.59.130:9092 --topic test --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.59.130:9092 --topic test --time -1