---------------------test类 kafkaToMango2 ------------------------------
package _20210531.oop.kafkatoHbaseAndMongo.oop;
import _20210531.oop.kafkatoHbaseAndMongo.oop.mongohandler.TrainHandlerMongo;
import _20210531.oop.kafkatoHbaseAndMongo.oop.worker.Worker;
import _20210531.oop.kafkatoHbaseAndMongo.oop.write.MongoDBWriter;
/**
* @Author Xulihua
* @Date2021/5/31
* @Description
*/
public class kafkaToMango2 {
public static void main(String[] args) {
TrainHandlerMongo trainHandlerMongo = new TrainHandlerMongo();
MongoDBWriter mongoDBWriter = new MongoDBWriter(trainHandlerMongo);
Worker worker1 = new Worker("train","trainmg2",mongoDBWriter);
worker1.fillData("train");
}
}
---------------------worker类-------------------------------
package _20210531.oop.kafkatoHbaseAndMongo.oop.worker;
import _20210531.oop.kafkatoHbaseAndMongo.oop.write.IWriter;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Collections;
import java.util.Properties;
/**
* @Author Xulihua
* @Date2021/6/1
* @Description 配置kafka 执行写入动作
*/
public class Worker implements IWorker {
KafkaConsumer<String, String> consumer;
IWriter writer=null;
public Worker(String topicName,String GroupID,IWriter writer){
this.writer=writer;
final Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.107.103:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
// properties.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"1000"); //心跳
//设置是否自动提交
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 1000);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, GroupID); //组名改了之后就能在 auto_commit的值为true的情况 再次拿到值
consumer= new KafkaConsumer<String, String>(properties);
consumer.subscribe(Collections.singleton(topicName));
}
@Override
public void fillData(String targetName) {
int sum=0;
while (true){
ConsumerRecords<String, String> records = consumer.poll(100);
int count= writer.write(targetName,records);
sum+=count;
System.out.println("处理数据量:"+sum);
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
--------------------------writer类---------------------------
package _20210531.oop.kafkatoHbaseAndMongo.oop.write;
import _20210531.oop.kafkatoHbaseAndMongo.oop.mongohandler.IParseRecordMongo;
import com.mongodb.DuplicateKeyException;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.bson.Document;
import java.util.List;
/**
* @Author Xulihua
* @Date2021/6/1
* @Description 配置mongodb 将kafka读到的数据写入 mongodb表中的具体动作 在本类
*/
public class MongoDBWriter implements IWriter {
private IParseRecordMongo handler;
private MongoDatabase db;
public MongoDBWriter(IParseRecordMongo handler) {
this.handler=handler;
String mongohost = "192.168.107.103";
int mongoport = 27017;
MongoClient mongoClient = new MongoClient(mongohost, mongoport);
db = mongoClient.getDatabase("kgcdsj");
}
@Override
public int write(String tableName, ConsumerRecords<String, String> records) {
MongoCollection<Document> table = db.getCollection(tableName);
List<Document> datas = handler.parse(records);
if (datas !=null && datas.size()>0) {
table.insertMany(datas);
return datas.size();
}
return 0;
}
}
------------------------handler类--------------------------------
package _20210531.oop.kafkatoHbaseAndMongo.oop.mongohandler;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.bson.Document;
import java.util.ArrayList;
import java.util.List;
/**
* @Author Xulihua
* @Date2021/6/4
* @Description 对从kafka读到的数据进行处理
*/
public class TrainHandlerMongo implements IParseRecordMongo {
@Override
public List<Document> parse(ConsumerRecords<String, String> records) {
List<Document> list=new ArrayList<>();
for (ConsumerRecord<String,String> record:records){
System.out.println(record.value());
Document document = new Document();
String[] split = record.value().split(",");
double random = Math.random();
document.append("id",(split[0]+split[1]+random));
document.append("user",split[0]);
document.append("event",split[1]);
document.append("invited",split[2]);
document.append("time_stamp",split[3]);
document.append("interested",split[4]);
document.append("not_interested",split[5]);
list.add(document);
}
return list;
}
}
Exception in thread “main” com.mongodb.MongoBulkWriteException: Bulk write operation error on server 192.168.107.103:27017. Write errors: [BulkWriteError{index=0, code=11000, message=‘E11000 duplicate key error collection: June_Five.tableTwo index: id dup key: { : “1” }’, details={ }}].
意思是说 出现重复的索引,原因是有重复的数据,而出现重复的数据的原因就因人而异了,本次出现的原因在代码中推敲了很久才发现:handler类里的List list=new ArrayList<>() 位置写错了 写到了parse()方法外面,这就导致了每一次循环之后List的数据没有清空,也就是数据一直在,那在写入到mongo的一次循环之后,再次写的时候又是重写,数据重复,出现报错。