项目三

flume采集部分

部署flume客户端监控应用程序产生的日志信息,并发送到kafka集群中

test.channels = c1
test.sinks = k1

test.sources.s1.type = spooldir
test.sources.s1.spoolDir = /opt/kb07file/flumeFile/test
test.sources.s1.deserializer = LINE
test.sources.s1.deserializer.maxLineLength = 60000
test.sources.s1.includePattern = test_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv

test.channels.c1.type = file
test.channels.c1.checkpointDir =  /opt/kb07file/flumeFile/checkpoint/test
test.channels.c1.dataDir = /opt/kb07file/flumeFile/data/test

test.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
test.sinks.k1.batchSize = 640
test.sinks.k1.brokerList = 192.168.174.41:9092
test.sinks.k1.topic = test

test.sources.s1.channels =c1
test.sinks.k1.channel = c1

kafka清洗过程

public class UserFrienf {
    public static void main(String[] args) {
    //Properties类该类主要用于读取Java的配置文件
        Properties prop=new Properties();
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.174.41:9092");
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG, "kb07");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
 		prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        StreamsBuilder builder = new StreamsBuilder();
         builder.stream("user_friends_raw")
                .filter((k, v) -> (!v.toString().startsWith("user,") && v.toString().split(",").length == 2))
      .flatMap((k, v) -> {//1, 2 3 4 5 6 7
            System.out.println(k + "  " + v);       //1         2 3 4 5 6 7
            List<KeyValue<String, String>> keyValues = new ArrayList<>();
            String[] split = v.toString().split(",");   // [1, 2 3 4 5 6 7]
            String userId = split[0];                            // 1
            String[] friends = split[1].split(" ");     //[2, 3, 4, 5, 6, 7]
            for (String friend : friends) {
                KeyValue<String, String> keyValue = new KeyValue<>(null, userId + "   " + friend);
                keyValues.add(keyValue);
            }
            return keyValues;

        }).to("user_friends");

        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo, prop);
        CountDownLatch countDownLatch = new CountDownLatch(1);
            Runtime.getRuntime().addShutdownHook(new Thread("kb07"){
                @Override
                public void run() {
                    streams.close();
                    countDownLatch.countDown();
                }
            });
            streams.start();
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        System.exit(0);

    }
}

放入Hbase保存数据

public class UserFriendhb {
    public static void main(String[] args) {
        //hbase
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.174.41:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG, "aaa");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singletonList("user_friends5"));

        //hbase
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.rootdir", "hdfs://192.168.174.41:9000/hbase");
        config.set("hbase.zookeeper.quorum", "192.168.174.41");
        config.set("hbase.zookeeper.property.clientPort", "2181");

        try {
            Connection connection = ConnectionFactory.createConnection(config);
            Table table = connection.getTable(TableName.valueOf("b:user_friend"));

            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(100);
                //list一定要在while里面
                List<Put> putList = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record);
                    String[] infos = record.value().split(" ");

                    Put put = new Put(Bytes.toBytes((infos[0] + infos[1]).hashCode()));//key值

                    put.addColumn("uf".getBytes(), "userid".getBytes(), infos[0].getBytes());

                    put.addColumn("uf".getBytes(), "friendid".getBytes(), infos[1].getBytes());

                    putList.add(put);
                    System.out.println("--------------");
                }
                table.put(putList);
                table.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

将Hbase表映射到Hive中

create external table hb_user(
row_key string,
locale string,
birth_year string,
gender string,
joinedAt string,
location string,
timezone string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ('hbase.columns.mapping'=':key, region:locale,profile:birth_year,profile:gender,registration:joinedAt,region:location,region:timezone')
tblproperties ('hbase.table.name' = 'events_db:users');
      
      
create table eventskb07.user
stored as orc as
select * from eventskb07.hb_event_attendee;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值