public class UserFrienf {
public static void main(String[] args) {
//Properties类该类主要用于读取Java的配置文件
Properties prop=new Properties();
prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.174.41:9092");
prop.put(StreamsConfig.APPLICATION_ID_CONFIG, "kb07");
prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
StreamsBuilder builder = new StreamsBuilder();
builder.stream("user_friends_raw")
.filter((k, v) -> (!v.toString().startsWith("user,") && v.toString().split(",").length == 2))
.flatMap((k, v) -> {//1, 2 3 4 5 6 7
System.out.println(k + " " + v); //1 2 3 4 5 6 7
List<KeyValue<String, String>> keyValues = new ArrayList<>();
String[] split = v.toString().split(","); // [1, 2 3 4 5 6 7]
String userId = split[0]; // 1
String[] friends = split[1].split(" "); //[2, 3, 4, 5, 6, 7]
for (String friend : friends) {
KeyValue<String, String> keyValue = new KeyValue<>(null, userId + " " + friend);
keyValues.add(keyValue);
}
return keyValues;
}).to("user_friends");
Topology topo = builder.build();
KafkaStreams streams = new KafkaStreams(topo, prop);
CountDownLatch countDownLatch = new CountDownLatch(1);
Runtime.getRuntime().addShutdownHook(new Thread("kb07"){
@Override
public void run() {
streams.close();
countDownLatch.countDown();
}
});
streams.start();
try {
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
System.exit(0);
}
}
Part2
- 编写一个接口,封装拓扑结构的方法\
public interface ICustomTopolopy {
public Topology buildCustomToplogy();
}
- 编写实现类
public class UserfriendTopology implements ICustomTopolopy {
StreamsBuilder builder = new StreamsBuilder();
@Override
public Topology buildCustomToplogy() {
KStream<Object, Object> user_friends_raw = builder.stream("user_friends_raw")
.filter((k, v) -> (!v.toString().startsWith("user,")
&& v.toString().split(",").length == 2));
user_friends_raw.flatMap((k, v) -> { //1, 2 3 4 5 6 7
System.out.println(k + " " + v); // 1 2 3 4 5 6 7
List<KeyValue<String, String>> keyValues = new ArrayList<>();
String[] split = v.toString().split(","); // [1, 2 3 4 5 6 7]
String userId = split[0]; // 1
String[] friends = split[1].split(" "); // [2, 3, 4, 5, 6, 7]
for (String friend :
friends) {
KeyValue<String, String> keyValue = new KeyValue<>(null, userId + " " + friend);
keyValues.add(keyValue);
}
return keyValues;
}).to("user_friends");
Topology topo = builder.build();
return topo;
}
}
- 编写公共类(里面封装一些配置文件和启动关闭方法)
public class StreamHandler {
private ICustomTopolopy topology;
Properties prop = new Properties();
public StreamHandler(ICustomTopolopy topology){
this.topology = topology;
}
public void execute(){
prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG , "192.168.153.141:9092");
prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"kb072");
prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
Topology topo = this.topology.buildCustomToplogy();
KafkaStreams streams = new KafkaStreams(topo, prop);
CountDownLatch countDownLatch = new CountDownLatch(1);
Runtime.getRuntime().addShutdownHook(new Thread("kb07"){
@Override
public void run() {
streams.close();
countDownLatch.countDown();
}
});
try {
streams.start();
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
- main方法
public class StreamDriver {
public static void main(String[] args) {
ICustomTopolopy topology = new UserfriendTopology();
StreamHandler handler = new StreamHandler(topology);
handler.execute();
}
}
将清洗好的数据传输到HBase中
因为kafka中的文件只能存放七天
public class UserFriendhb {
public static void main(String[] args) {
//hbase
Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.174.41:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
prop.put(ConsumerConfig.GROUP_ID_CONFIG, "aaa");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singletonList("user_friends5"));
//hbase
Configuration config = HBaseConfiguration.create();
config.set("hbase.rootdir", "hdfs://192.168.174.41:9000/hbase");
config.set("hbase.zookeeper.quorum", "192.168.174.41");
config.set("hbase.zookeeper.property.clientPort", "2181");
try {
Connection connection = ConnectionFactory.createConnection(config);
Table table = connection.getTable(TableName.valueOf("b:user_friend"));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
//list一定要在while里面
List<Put> putList = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
System.out.println(record);
String[] infos = record.value().split(" ");
Put put = new Put(Bytes.toBytes((infos[0] + infos[1]).hashCode()));//key值
put.addColumn("uf".getBytes(), "userid".getBytes(), infos[0].getBytes());
put.addColumn("uf".getBytes(), "friendid".getBytes(), infos[1].getBytes());
putList.add(put);
System.out.println("--------------");
}
table.put(putList);
table.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}