从某一目录下读取文件写入到kafka的代码实现
package lm;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import consumer.AvroToFile;
import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
public class KProducer {
private String topic;
private String emlPath;
private String kafkaUrl;
Producer<String, byte[]> procuder;
public KProducer(String topci, String emlPath, String kafkaUrl) {
this.topic = topci;
this.emlPath = emlPath;
this.kafkaUrl = kafkaUrl;
System.out.println("topic:" + topic);
System.out.println("emlPath:" + emlPath);
}
public void init() {
Properties props = new Properties();
props.put("bootstrap.servers", kafkaUrl);
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
procuder = new KafkaProducer<String, byte[]>(props);
}
public void start() throws InterruptedException {
File files = new File(emlPath);
if (!files.exists()) {
System.out.println("emlPath not find :" + emlPath );
return;
}
while (true) {
for (File f : files.listFiles()) {
try {
System.out.println("get file name:" + f.getName());
try {
FileInputStream fin = new FileInputStream(f);
byte[] b = new byte[(int) f.length()];
fin.read(b);
System.out.println("byte length:" + b.length);
ProducerRecord<String, byte[]> msg = new ProducerRecord<String, byte[]>(topic, b);
procuder.send(msg);
fin.close();
f.delete();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
}
Thread.sleep(1000);
}
}
public static void main(String[] args) throws InterruptedException {
if (args.length != 3) {
System.out.println("args[0]: topic");
System.out.println("args[1]: emlPath");
System.out.println("args[2]: kafkaUrl");
return;
}
KProducer kp = new KProducer(args[0], args[1], args[2]);
kp.init();
kp.start();
}
}
从kafka 拉取数据消费到文件中
package lm;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
public class KConsumer {
private String topic;
private String kafkaUrl;
private String toPath;
private KafkaConsumer<String, byte[]> consumer;
public KConsumer(String topic, String kafkaUrl, String path) throws NoSuchFileException {
this.topic = topic;
this.kafkaUrl = kafkaUrl;
this.toPath = path;
}
public void init() {
Properties props = new Properties();
props.put("bootstrap.servers", kafkaUrl);
props.put("group.id", topic+"_test");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "latest"); //earliest,latest,none
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
consumer = new KafkaConsumer<String, byte[]>(props);
System.out.println("subscribe topic:" + topic);
consumer.subscribe(Arrays.asList(topic));
}
public void run() {
long n = 0;
while (true) {
try {
ConsumerRecords<String, byte[]> records = consumer.poll(1000);
if (records.count() == 0) {
System.out.println("records empty");
continue;
}
for (ConsumerRecord<String, byte[]> record : records) {
// System.out.printf("offset = %d, key = %s\n", record.offset(), record.key());
try {
write(record.value());
} catch (Exception e) {
e.printStackTrace();
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
public void write(byte[] b) {
String f = this.toPath + File.separator + "Test_" + System.currentTimeMillis() + ".test";
try {
FileOutputStream out = new FileOutputStream(new File(f));
out.write(b);
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws NoSuchFileException {
if (args.length != 3) {
System.out.println("args[0]: topic, args[1]: kafkaUrl, args[2]:toPath");
return;
}
KConsumer kc = new KConsumer(args[0], args[1], args[2]);
kc.init();
kc.run();
}
}