清洗Kafka数据的API

public class UserFrienf {
    public static void main(String[] args) {
    //Properties类该类主要用于读取Java的配置文件
        Properties prop=new Properties();
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.174.41:9092");
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG, "kb07");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
 		prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        StreamsBuilder builder = new StreamsBuilder();
         builder.stream("user_friends_raw")
                .filter((k, v) -> (!v.toString().startsWith("user,") && v.toString().split(",").length == 2))
      .flatMap((k, v) -> {//1, 2 3 4 5 6 7
            System.out.println(k + "  " + v);       //1         2 3 4 5 6 7
            List<KeyValue<String, String>> keyValues = new ArrayList<>();
            String[] split = v.toString().split(",");   // [1, 2 3 4 5 6 7]
            String userId = split[0];                            // 1
            String[] friends = split[1].split(" ");     //[2, 3, 4, 5, 6, 7]
            for (String friend : friends) {
                KeyValue<String, String> keyValue = new KeyValue<>(null, userId + "   " + friend);
                keyValues.add(keyValue);
            }
            return keyValues;

        }).to("user_friends");

        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo, prop);
        CountDownLatch countDownLatch = new CountDownLatch(1);
            Runtime.getRuntime().addShutdownHook(new Thread("kb07"){
                @Override
                public void run() {
                    streams.close();
                    countDownLatch.countDown();
                }
            });
            streams.start();
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        System.exit(0);

    }
}

Part2

  1. 编写一个接口,封装拓扑结构的方法\
public interface ICustomTopolopy {
        public Topology buildCustomToplogy();

}
  1. 编写实现类
public class UserfriendTopology implements ICustomTopolopy {

    StreamsBuilder builder = new StreamsBuilder();

    @Override
    public Topology buildCustomToplogy() {

        KStream<Object, Object> user_friends_raw = builder.stream("user_friends_raw")
                .filter((k, v) -> (!v.toString().startsWith("user,")
                        && v.toString().split(",").length == 2));
        user_friends_raw.flatMap((k, v) -> {    //1, 2 3 4 5 6 7
            System.out.println(k + "   " + v);   //  1       2 3 4 5 6 7
            List<KeyValue<String, String>> keyValues = new ArrayList<>();
            String[] split = v.toString().split(",");    // [1, 2 3 4 5 6 7]
            String userId = split[0];                           // 1
            String[] friends = split[1].split(" ");        // [2, 3, 4, 5, 6, 7]
            for (String friend :
                    friends) {
                KeyValue<String, String> keyValue = new KeyValue<>(null, userId + " " + friend);
                keyValues.add(keyValue);
            }
            return keyValues;
        }).to("user_friends");

        Topology topo = builder.build();
        return topo;
    }
}
  1. 编写公共类(里面封装一些配置文件和启动关闭方法)
public class StreamHandler {
    private ICustomTopolopy topology;
    Properties prop = new Properties();

    public StreamHandler(ICustomTopolopy topology){
        this.topology = topology;
    }

    public void execute(){
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG , "192.168.153.141:9092");
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"kb072");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

        Topology topo = this.topology.buildCustomToplogy();

        KafkaStreams streams = new KafkaStreams(topo, prop);

        CountDownLatch countDownLatch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("kb07"){
            @Override
            public void run() {
                streams.close();
                countDownLatch.countDown();
            }
        });

        try {
            streams.start();
            countDownLatch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
}
  1. main方法
public class StreamDriver {
    public static void main(String[] args) {
        ICustomTopolopy topology = new UserfriendTopology();
        StreamHandler handler = new StreamHandler(topology);
        handler.execute();
    }
}

将清洗好的数据传输到HBase中

因为kafka中的文件只能存放七天

public class UserFriendhb {
    public static void main(String[] args) {
        //hbase
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.174.41:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG, "aaa");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singletonList("user_friends5"));

        //hbase
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.rootdir", "hdfs://192.168.174.41:9000/hbase");
        config.set("hbase.zookeeper.quorum", "192.168.174.41");
        config.set("hbase.zookeeper.property.clientPort", "2181");

        try {
            Connection connection = ConnectionFactory.createConnection(config);
            Table table = connection.getTable(TableName.valueOf("b:user_friend"));

            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(100);
                //list一定要在while里面
                List<Put> putList = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record);
                    String[] infos = record.value().split(" ");

                    Put put = new Put(Bytes.toBytes((infos[0] + infos[1]).hashCode()));//key值

                    put.addColumn("uf".getBytes(), "userid".getBytes(), infos[0].getBytes());

                    put.addColumn("uf".getBytes(), "friendid".getBytes(), infos[1].getBytes());

                    putList.add(put);
                    System.out.println("--------------");
                }
                table.put(putList);
                table.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值