服务器要求:jdk-8u121-linux-x64.tar.gz、kafka_2.12-0.10.2.1.tgz、spark-1.3.1-bin-hadoop2-without-hive.tgz
1、生产者—SpringMVC+Kafka
1.1、准备工作
所需资源:kafka_2.10-0.8.2.2.jar、kafka-clients-0.10.0.0.jar,把这两个jar包导入到项目中
1.2、配置
关于生产者Kafka服务器的配置,如下:
bootstrap.servers=172.17.0.2:9092
acks=all
retries=3
batch.size=16384
linger.ms=1
buffer.memory=33554432
key.serializer=org.apache.kafka.common.serialization.StringSerializer
value.serializer=org.apache.kafka.common.serialization.StringSerializer
1.3、编写代码
1.3.1、线程池工厂
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
public class ExecutorServiceFactory {
private ExecutorService executors;
private ExecutorServiceFactory() {
}
public static final ExecutorServiceFactory getInstance() {
return KafkaProducerPoolHolder.instance;
}
private static class KafkaProducerPoolHolder {
private static final ExecutorServiceFactory instance = new ExecutorServiceFactory();
}
public ExecutorService createScheduledThreadPool() {
// CPU个数
int availableProcessors = Runtime.getRuntime().availableProcessors();
// 创建
executors = Executors.newScheduledThreadPool(availableProcessors * 10, new KafkaProducterThreadFactory());
return executors;
}
public ExecutorService createSingleThreadExecutor() {
// 创建
executors = Executors.newSingleThreadExecutor(new KafkaProducterThreadFactory());
return executors;
}
public ExecutorService createCachedThreadPool() {
// 创建
executors = Executors.newCachedThreadPool(new KafkaProducterThreadFactory());
return executors;
}
public ExecutorService createFixedThreadPool(int count) {
// 创建
executors = Executors.newFixedThreadPool(count, new KafkaProducterThreadFactory());
return executors;
}
private class KafkaProducterThreadFactory implements ThreadFactory {
@Override
public Thread newThread(Runnable runnable) {
SecurityManager s = System.getSecurityManager();
ThreadGroup group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
Thread t = new Thread(group, runnable);
return t;
}
}
}
1.3.2 、生产者线程池
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
public class KafkaProducerPool {
private ExecutorService executor;
private KafkaProducerPool() {
executor = ExecutorServiceFactory.getInstance().createFixedThreadPool(2);
}
public static final KafkaProducerPool getInstance() {
return KafkaProducerPoolHolder.instance;
}
private static class KafkaProducerPoolHolder {
private static final KafkaProducerPool instance = new KafkaProducerPool();
}
/**
* 关闭线程池,这里要说明的是:调用关闭线程池方法后,线程池会执行完队列中的所有任务才退出
*
* @author SHANHY
* @date 2015年12月4日
*/
public void shutdown(){
executor.shutdown();
}
/**
* 提交任务到线程池,可以接收线程返回值
*
* @param task
* @return
* @author SHANHY
* @date 2015年12月4日
*/
public Future<?> submit(Runnable task) {
return executor.submit(task);
}
/**
* 提交任务到线程池,可以接收线程返回值
*
* @param task
* @return
* @author SHANHY
* @date 2015年12月4日
*/
public Future<?> submit(Callable<?> task) {
return executor.submit(task);
}
/**
* 直接提交任务到线程池,无返回值
*
* @param task
* @author SHANHY
* @date 2015年12月4日
*/
public void execute(Runnable task){
executor.execute(task);
}
}
1.3.3、生产者
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import com.cda91.common.HttpJResopnse;
public class KafkaProducerT implements Runnable {
private static Producer<String, String> producer;
private String topic = "kafka_producer_title";
private static Map<String, List<Object>> messages;
private static List<String> keys;
private String key = null;
private Object value = null;
private static Properties props;
private static final String productPro = "/kafka.properties";
private KafkaProducterListener listener;
private KafkaProducerT() {
System.out.println("KafkaProducerT构造函数被调用");
props = new Properties();
messages = new HashMap<String, List<Object>>();
keys = new ArrayList<String>();
try {
InputStream is = KafkaProducerT.class.getResourceAsStream(productPro);
props.load(is);
is.close();
is = null;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
producer = new KafkaProducer<String, String>(props);
}
public static final KafkaProducerT getInstance() {
return KafkaProducerTHolder.instance;
}
private static class KafkaProducerTHolder{
private static final KafkaProducerT instance = new KafkaProducerT();
}
public void setTopic(String topic) {
this.topic = topic;
}
public void push(String key,Object value){
if(!keys.contains(key)) {
keys.add(key);
}
if(!messages.containsKey(key)) {
messages.put(key, new ArrayList<Object>());
}
messages.get(key).add(value);
}
public void push(String key,Object value,KafkaProducterListener listener) {
if(!keys.contains(key)) {
keys.add(key);
}
if(!messages.containsKey(key)) {
messages.put(key, new ArrayList<Object>());
}
messages.get(key).add(value);
this.listener = listener;
}
@Override
public void run() {
while (true) {
if(keys.size() == 0 || messages.size()==0) {
producer.flush();
continue;
}
key = keys.get(0);
value = messages.get(key).get(0);
messages.get(key).remove(value);
if(messages.get(key).size() == 0) {
messages.remove(key);
}
if(!messages.containsKey(key)) {
keys.remove(key);
}
producer.send(new ProducerRecord<String, String>(topic,
key+"=>"+value.toString()),new Callback() {
public void onCompletion(RecordMetadata metadata, Exception exception) {
System.out.println("topic=>"+topic+",key="+key+",value="+value);
if(exception == null) {
if(listener != null) {
listener.onComplete(new HttpJResopnse());
}
}else {
HttpJResopnse resopnse = new HttpJResopnse();
resopnse.setStatus(HttpJResopnse.ERROR);
resopnse.setMessage(exception.getMessage());
listener.onComplete(resopnse);
}
}
});
}
}
public static void main(String[] args) {
KafkaProducerT kafkaProducerT = KafkaProducerT.getInstance();
KafkaProducerPool kafkaProducerPool = KafkaProducerPool.getInstance();
kafkaProducerPool.execute(kafkaProducerT);
kafkaProducerT.setTopic("test");
kafkaProducerT.push("name", "wangyui");
}
public interface KafkaProducterListener {
public void onComplete(HttpJResopnse resopnse);
}
}
2、消费者—Kafka+Maven
2.1、pom.xml相关
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cda</groupId>
<artifactId>sparkcda</artifactId>
<version>0.0.1</version>
<packaging>jar</packaging>
<name>sparkcda</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency> <!-- Spark dependency -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.3.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.3.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.8.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>com.101tec</groupId>
<artifactId>zkclient</artifactId>
<version>0.10</version>
</dependency>
<dependency>
<groupId>com.yammer.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>2.2.0</version>
</dependency>
</dependencies>
</project>
2.2 代码编写
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import kafka.serializer.StringDecoder;
import scala.Tuple2;
public class AccessStream {
private static final Pattern SPACE = Pattern.compile(" ");
public static void main(String[] args) {
consurmer();
}
public static void consurmer() {
String brokers = "localhost:9092";
String topics = "cda_pv";
// Create context with a 2 seconds batch interval
SparkConf sparkConf = new SparkConf().setAppName("AccessStream");
sparkConf.setMaster("local[*]");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(20));
Set<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(",")));
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", brokers);
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
// Get the lines, split them into words, count the words and print
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Iterable<String> call(String x) {
return Arrays.asList(SPACE.split(x));
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
}).reduceByKey(
new Function2<Integer, Integer, Integer>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
// Start the computation
jssc.start();
jssc.awaitTermination();
}
}
3、服务器搭建
3.1、软件安装
解压jdk-8u121-linux-x64.tar.gz、kafka_2.12-0.10.2.1.tgz、spark-1.3.1-bin-hadoop2-without-hive.tgz 到 /usrl/local,并加入到PATH中(这些都懂得,不必赘述)
3.2、相关配置
cho -e "export JAVA_HOME=$JAVA_HOME" >> $SPARK_HOME/sbin/spark-config.sh \
echo -e \"advertised.listeners=PLAINTEXT://你的服务器IP:9092\" >> $KAFKA_HOME/config/server.properties
3.3、测试
spark-submit --class com.cda.stream.AccessStream --jars /usr/local/spark-1.3.1-bin-hadoop2-without-hive/lib/kafka_2.10-0.8.2.1.jar,/usr/local/spark-1.3.1-bin-hadoop2-without-hive/lib/spark-streaming-kafka_2.10-1.3.1.jar,/usr/local/kafka_2.12-0.10.2.1/libs/metrics-core-2.2.0.jar /opt/sparkcda-0.0.1.jar