一 kafka数据格式
403813272,3621115689
403813272,1099977298
403813272,1470696976
403813272,325978978
403813272,2429535244
403813272,3934248982
403813272,3972188036
403813272,318125731
403813272,3418280204
403813272,3741420925
403813272,4076174762
403813272,106377924
403813272,3213173897
1629404827,962342303
1629404827,262347297
1629404827,1655332036
1629404827,1544374715
1629404827,2085945544
1629404827,1596477432
1629404827,1788879535
1629404827,3745954474
........
二 MongoDB建表语句
use events_db
db.createCollection("user_friend", {
validator: {
$jsonSchema: {
bsonType: "object",
required: [ "user_id", "friend_id" ],
properties: {
user_id: {
bsonType : "string",
description: "the identifier of a user"
},
friend_id: {
bsonType : "string",
description: "the identifier of the friend"
}
}
}
}
})
db.user_friend.createIndex({ user_id: 1, friend_id: 1 })
三 将kafka数据传入到MongoDB中
package my.test.Kafka_To_MongoDB;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.bson.Document;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
public class UserFriend {
public static void main(String[] args) {
Properties prop = new Properties();
prop.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.226.111:9092");
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "5000");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,"wang1");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
consumer.subscribe(Collections.singletonList("user_friends"));
ServerAddress serverAddress = new ServerAddress("192.168.226.111", 27017);
List<ServerAddress> addresses = new ArrayList<>();
addresses.add(serverAddress);
MongoCredential mongoCredential =
MongoCredential.createScramSha1Credential("wang","events_db","ok".toCharArray());
List<MongoCredential> credentials = new ArrayList<>();
credentials.add(mongoCredential);
MongoClient mongoClient = new MongoClient(addresses,credentials);
MongoDatabase testdb = mongoClient.getDatabase("events_db");
MongoCollection<Document> usersTB = testdb.getCollection("user_friend");
List<Document> docs = new ArrayList<>();
while (true){
ConsumerRecords<String, String> records = consumer.poll(1000);
docs.clear();
if(!records.isEmpty()){
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
Document document = new Document();
String[] result = record.value().split(",",-1);
document.append("user_id",result[0])
.append("friend_id",result[1]);
docs.add(document);
}
usersTB.insertMany(docs);
}else{
System.exit(1);
}
}
}
}