kafka数据导入到Mongodb

创建一个maven项目——kafkatomongodb

更改pom.xml文件中的依赖:

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka_2.11</artifactId>
      <version>2.0.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka-clients</artifactId>
      <version>2.0.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.mongodb/mongo-java-driver -->
    <dependency>
      <groupId>org.mongodb</groupId>
      <artifactId>mongo-java-driver</artifactId>
      <version>3.2.2</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.mongodb.mongo-hadoop/mongo-hadoop-core -->
    <dependency>
      <groupId>org.mongodb.mongo-hadoop</groupId>
      <artifactId>mongo-hadoop-core</artifactId>
      <version>2.0.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.mongodb.mongo-hadoop/mongo-hadoop-hive -->
    <dependency>
      <groupId>org.mongodb.mongo-hadoop</groupId>
      <artifactId>mongo-hadoop-hive</artifactId>
      <version>2.0.0</version>
    </dependency>

  </dependencies>

后续hive和mongodb之间映射需要用到上述的三个mongo-java-driver.jar、mongo-hadoop-core.jar、mongo-hadoop-hive.jar。将上述的三个jar包放到/opt/software/hive110/lib中,并设置777权限。
[root@single lib]# chmod 777 mongo-*
三个jar包下载地址:https://mvnrepository.com/search?q=mongodb

创建—— Kafka_To_Mongodb.java

package cn.alisa;

import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.bson.Document;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

public class Kafka_To_Mongodb {
    public static void main(String[] args) {
        //消费数据的配置
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.21.130:9092");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"alisa");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getTypeName());
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getTypeName());
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<>(prop);
        kafkaConsumer.subscribe(Collections.singletonList("kafkamongo"));

        //连接mongodb服务器
        MongoClient mongoClient = new MongoClient("192.168.21.200", 27017);
        MongoDatabase db = mongoClient.getDatabase("kafkamongo");
        MongoCollection<Document> users = db.getCollection("users");
        List<Document> docs=new ArrayList<>();
        while (true){
            //ConsumerRecord API用于从Kafka集群接收记录
            ConsumerRecords<String,String> records = kafkaConsumer.poll(1000);
            //对上一次的默认(可以进行调节)五百行数据进行清理
            docs.clear();
            //如果不为空的话,则进入以下的for循环,如果为空的话,则进入else
            if (!records.isEmpty()){
                for (ConsumerRecord<String,String> record : records) {
//                    System.out.println(record.value());
                    Document document = new Document();
                    String[] result = record.value().split(",", -1);
                    document.append("user_id",result[0]).append("locale",result[1])
                            .append("birthyear",result[2]).append("gender",result[3])
                            .append("joinedAt",result[4]).append("location",result[5])
                            .append("timezone",result[6]);
                    docs.add(document);
                }
                users.insertMany(docs);
            }else {
                System.out.println("Thread is sleeping");
                //此处代码的好处是模拟了准实时化,真实情况为多线程实现,此处为模拟
                try {
                    //进入到此else处,则进行等待睡眠
                    //每等待0.3秒则查看是否是有新数据进入,如果有数据进入则继续上面的代码对数据类型变为document类型
                    //否则继续进行等待
                    Thread.sleep(3000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值