Linux
Flume
userfriend-flume-kakfa.conf
user_friend.sources=userFriendSource
user_friend.channels=userFriendChannel
user_friend.sinks=userFriendSink
user_friend.sources.userFriendSource.type=spooldir
user_friend.sources.userFriendSource.spoolDir=/opt/flume/conf/jobkb09/dataSourceFile/userFriend1
user_friend.sources.userFriendSource.deserializer=LINE
user_friend.sources.userFriendSource.deserializer.maxLineLength=320000
user_friend.sources.userFriendSource.includePattern=userfriend_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
user_friend.sources.userFriendSource.interceptors=head_filter
user_friend.sources.userFriendSource.interceptors.head_filter.type=regex_filter
user_friend.sources.userFriendSource.interceptors.head_filter.regex=^user*
user_friend.sources.userFriendSource.interceptors.head_filter.excludeEvents=true
user_friend.channels.userFriendChannel.type=file
user_friend.channels.userFriendChannel.checkpointDir=/opt/flume/conf/jobkb09/checkPointFile/userFriend1
user_friend.channels.userFriendChannel.dataDirs=/opt/flume/conf/jobkb09/dataChannelFile/userFriend1
user_friend.sinks.userFriendSink.type=org.apache.flume.sink.kafka.KafkaSink
user_friend.sinks.userFriendSink.batchSize=640
user_friend.sinks.userFriendSink.brokerList=192.168.134.150:9092
user_friend.sinks.userFriendSink.topic=user_friends_raw
user_friend.sources.userFriendSource.channels=userFriendChannel
user_friend.sinks.userFriendSink.channel=userFriendChannel
在Flume的安装目录下执行,具体文件路径因人而异
./bin/flume-ng agent -n user_friend -c ./conf/ -f ./conf/jobkb09/userfriend-flume-kafka.conf -Dflume.root.logger=INFO,console
Kafka
kafka-console-consumer.sh --topic user_friends --bootstrap-server localhost:9092 --from-beginning
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.134.150:9092 --topic user_friends -time -1 --offsets 1
#user_friends:0:30386403
Java API
package kafka;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
public class UserFriendStream {
public static void main(String[] args) {
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG,"UserFriendStream2");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.134.150:9092");
// props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,3000);
// props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
// props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
StreamsBuilder builder = new StreamsBuilder();
builder.stream("user_friends_raw").flatMap((k,v)->{
List<KeyValue<String,String>> list = new ArrayList();
String[] info=v.toString().split(",");
if (info.length!=2) return list; //清除脏数据
String[] friends=info[1].split("\\s+");
for (String friend : friends) {
System.out.println(info[0]+":"+friend);
list.add(new KeyValue<String,String>(null,info[0]+","+friend));
}
return list;
}).to("user_friends");
final Topology topo = builder.build();
final KafkaStreams streams = new KafkaStreams(topo, props);
final CountDownLatch latch = new CountDownLatch(1);
Runtime.getRuntime().addShutdownHook(new Thread("") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (Throwable e) {
System.exit(1);
}
System.exit(0);
}
}