创建一个maven项目——kafkatomongodb
更改pom.xml文件中的依赖:
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>2.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb/mongo-java-driver -->
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.2.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb.mongo-hadoop/mongo-hadoop-core -->
<dependency>
<groupId>org.mongodb.mongo-hadoop</groupId>
<artifactId>mongo-hadoop-core</artifactId>
<version>2.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb.mongo-hadoop/mongo-hadoop-hive -->
<dependency>
<groupId>org.mongodb.mongo-hadoop</groupId>
<artifactId>mongo-hadoop-hive</artifactId>
<version>2.0.0</version>
</dependency>
</dependencies>
后续hive和mongodb之间映射需要用到上述的三个mongo-java-driver.jar、mongo-hadoop-core.jar、mongo-hadoop-hive.jar。将上述的三个jar包放到/opt/software/hive110/lib中,并设置777权限。
[root@single lib]# chmod 777 mongo-*
三个jar包下载地址:https://mvnrepository.com/search?q=mongodb
创建—— Kafka_To_Mongodb.java
package cn.alisa;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.bson.Document;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
public class Kafka_To_Mongodb {
public static void main(String[] args) {
//消费数据的配置
Properties prop = new Properties();
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.21.130:9092");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"alisa");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getTypeName());
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getTypeName());
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<>(prop);
kafkaConsumer.subscribe(Collections.singletonList("kafkamongo"));
//连接mongodb服务器
MongoClient mongoClient = new MongoClient("192.168.21.200", 27017);
MongoDatabase db = mongoClient.getDatabase("kafkamongo");
MongoCollection<Document> users = db.getCollection("users");
List<Document> docs=new ArrayList<>();
while (true){
//ConsumerRecord API用于从Kafka集群接收记录
ConsumerRecords<String,String> records = kafkaConsumer.poll(1000);
//对上一次的默认(可以进行调节)五百行数据进行清理
docs.clear();
//如果不为空的话,则进入以下的for循环,如果为空的话,则进入else
if (!records.isEmpty()){
for (ConsumerRecord<String,String> record : records) {
// System.out.println(record.value());
Document document = new Document();
String[] result = record.value().split(",", -1);
document.append("user_id",result[0]).append("locale",result[1])
.append("birthyear",result[2]).append("gender",result[3])
.append("joinedAt",result[4]).append("location",result[5])
.append("timezone",result[6]);
docs.add(document);
}
users.insertMany(docs);
}else {
System.out.println("Thread is sleeping");
//此处代码的好处是模拟了准实时化,真实情况为多线程实现,此处为模拟
try {
//进入到此else处,则进行等待睡眠
//每等待0.3秒则查看是否是有新数据进入,如果有数据进入则继续上面的代码对数据类型变为document类型
//否则继续进行等待
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
}