1.新建maven工程——mykafka
2.配置pom.xml文件。
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<kafka.version>2.0.0</kafka.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>${kafka.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
</dependencies>
<build>
<finalName>mykafka</finalName>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>org.alisa.common.MyExecute</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
3.创建一个接口类——Dbinfo.java
package cn.alisa.commons;
import java.util.Map;
//配置类的接口
public interface Dbinfo {
public String getIp();
public int getPort();
public String dbName();
public Map<String,String> getOther();
}
4.创建配置项类——KafkaConfiguration.java,实现上述接口。
package org.alisa.common;
import java.util.Map;
public class KafkaConfiguration implements Dbinfo {
private String ip;
private int port;
private String dbname;
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public void setPort(int port) {
this.port = port;
}
public String getDbname() {
return dbname;
}
public void setDbname(String dbname) {
this.dbname = dbname;
}
@Override
public String getIP() {
return this.ip;
}
@Override
public int getPort() {
return this.port;
}
@Override
public String dbName() {
return this.dbname;
}
@Override
public Map<String, String> getOther() {
return null;
}
}
5.创建KafkaConnector.java,用来生产者发送消息。
package org.alisa.common;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.KafkaAdminClient;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.KafkaFuture;
import org.apache.kafka.common.serialization.StringSerializer;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.*;
import java.util.concurrent.ExecutionException;
public class KafkaConnector {
private Dbinfo dbinfo;
private int totalRow=0;
private List<Integer> rowSize=new ArrayList<>();
Properties prop = new Properties();
//构造器
public KafkaConnector(Dbinfo info){
this.dbinfo=info;
prop.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,dbinfo.getIP()+":"+dbinfo.getPort());
prop.put(ProducerConfig.ACKS_CONFIG, "all");
//生产者发送失败后的重试次数,默认0
prop.put(ProducerConfig.RETRIES_CONFIG, "0");
prop.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getTypeName());
prop.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getTypeName());
}
//发送
public void sendkafka(String path) throws FileNotFoundException {
//获取总行数和每行最后的字节数
getFileInfo(path);
//获取分区数量
int partitionSize=getTopicPartitionNumber();
//根据分区数量分割文件 找到每个线程的开始position和行数
Map<Long,Integer> threadInfo=calcPosAndRow(partitionSize);
int name=0;
for (Long key:threadInfo.keySet()) {
new UserKafkaProducer(name+"",key,threadInfo.get(key),prop,dbinfo.dbName(),path).start();
name++;
}
}
/**
* 计算partitionnum个线程 每个线程的开始字节位置和行数
* 计算每一段的位置和每一段的总行数
* @param partitionnum 线程数
* @return
*/
private Map<Long,Integer> calcPosAndRow(int partitionnum){
Map<Long,Integer> result = new HashMap<>();
//计算平均每个线程所用的行数
int rows=totalRow/partitionnum;
//循环计算每个开始的节点
for (int i = 0; i < partitionnum; i++) {
if (i==(partitionnum-1)){
//计算第N次的行数,最后一个
result.put(getPos(rows*i+1),rows+totalRow%partitionnum);
}else {
result.put(getPos(rows*i+1),rows);
}
}
return result;
}
/**
* 根据用户的行号计算对应行的开始位置
* 计算下一次位置
* @param lineNumber
* @return
*/
private Long getPos(int lineNumber){
return (long)rowSize.get(lineNumber-1)+(lineNumber-1);
}
/**
* 获取文件的信息
* 文件的行数
* 文件每行的结尾字节数
* 文件路径
* @param path
*/
private void getFileInfo(String path) throws FileNotFoundException {
LineNumberReader reader = new LineNumberReader(new FileReader(path));
try {
String str=null;
//total:一行有多长
int total=0;
//reader.readLine()一次读取一行
while((str=reader.readLine())!=null){
total+=str.getBytes().length+1;
rowSize.add(total);
}
//总行数
totalRow=reader.getLineNumber();
rowSize.add(0,0);
} catch (IOException e) {
e.printStackTrace();
}finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 获取kafka消息主题的分区数量
* @return
*/
private int getTopicPartitionNumber(){
//KafkaAdminClient kafka管理客户端
AdminClient client = KafkaAdminClient.create(prop);
//describeTopics:查询topic列表,Arrays.asList:将数组或一些元素转为集合
DescribeTopicsResult result = client.describeTopics(Arrays.asList(dbinfo.dbName()));
//主题的信息
KafkaFuture<Map<String, TopicDescription>> kf = result.all();
//分区数
int count=0;
try {
count=kf.get().get(dbinfo.dbName()).partitions().size();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
return count;
}
}
6.创建简单分区器类——SimplePartition.java,用来生产者将数据发送到指定分区内。
package org.alisa.common;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;
import java.util.List;
import java.util.Map;
//简单的分区器
public class SimplePartition implements Partitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
//key线程名
return Integer.parseInt(key.toString());
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> map) {
}
}
7.创建线程UserKafkaProducer.java
package org.alisa.common;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Properties;
public class UserKafkaProducer extends Thread{
private long beginPos;
private int row;
private Properties prop;
private String topic;
private String path;
//构造器
public UserKafkaProducer(String threadName,long begin,int row,Properties prop,String topic,String path){
this.beginPos=begin;
this.row=row;
this.prop=prop;
this.topic=topic;
this.setName(threadName);
this.path=path;
}
@Override
public void run() {
//向指定分区发送消息,用的是SimplePartition类
prop.put("partitioner.class","org.alisa.common.SimplePartition");
prop.put("batch.size",524288);
prop.put("linger.ms",10);
KafkaProducer<String, String> producer = new KafkaProducer<>(prop);
try {
RandomAccessFile raf = new RandomAccessFile(new File(path), "r");
//seek偏移量
raf.seek(beginPos);
for (int line = 0; line < row; line++) {
//更换编码格式,因为文件中有中文,出来会是乱码
String ln = new String(raf.readLine().getBytes("iso-8859-1"), "utf-8");
// System.out.println(Thread.currentThread().getName()+":"+ln);
ProducerRecord<String, String> record =
new ProducerRecord<>(topic,Thread.currentThread().getName(), ln);
//发送
producer.send(record);
}
producer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
8.创建测试类——MyExecute.java
package org.alisa.common;
//使用java编写kafka生产者打成jar包,将windows下的文件写到kafka的主题中。
//在windows下运行jar包所需的4个参数
public class MyExecute {
public static void main(String[] args) throws Exception {
if (args.length==4) {
KafkaConfiguration configuration = new KafkaConfiguration();
configuration.setIp(args[0]);
configuration.setPort(Integer.parseInt(args[1]));
configuration.setDbname(args[2]);
new KafkaConnector(configuration).sendkafka(args[3]);
}else{
throw new Exception("the arguments need 4 but "+args.length);
}
}
}
9.生成jar包。先双击clean,再双击package。
在项目左边就会形成jar包。将jar包放到本地某个路径下。
10.在windows下执行jar包命令。不过在此之前需要现在linux下创建一个主题。
命令为:
kafka-topics.sh --zookeeper 192.168.21.130:2181 --create --topic mydemo --replication-factor 1 --partitions 3
可以打开consumer消费端命令控制台查看:
kafka-console-consumer.sh --bootstrap-server 192.168.21.130:9092 --from-beginning --topic mydemo
然后查看主题的偏移量,查看数据是否写到主题中:
kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list single:9092 --topic mydemo --time -1
现在开始执行jar包命令:
java -jar mykafka-jar-with-dependencies.jar 192.168.21.130 9092 mydemo e:/logs/log_2020-01-01.log
然后可以在consumer消费端查看到进度:
查看主题偏移量: