1 kafka 生产者
1.1 父pom
< build>
< plugins>
< plugin>
< groupId> org.apache.maven.plugins</ groupId>
< artifactId> maven-compiler-plugin</ artifactId>
< version> 3.8.1</ version>
< configuration>
< source> 1.8</ source>
< target> 1.8</ target>
< encoding> UTF-8</ encoding>
</ configuration>
</ plugin>
< plugin>
< groupId> net.alchim31.maven</ groupId>
< artifactId> scala-maven-plugin</ artifactId>
< version> 3.1.6</ version>
< configuration>
< scalaCompatVersion> 2.11</ scalaCompatVersion>
< scalaVersion> 2.11.12</ scalaVersion>
< encoding> UTF-8</ encoding>
</ configuration>
< executions>
< execution>
< id> compile-scala</ id>
< phase> compile</ phase>
< goals>
< goal> add-source</ goal>
< goal> compile</ goal>
</ goals>
</ execution>
< execution>
< id> test-compile-scala</ id>
< phase> test-compile</ phase>
< goals>
< goal> add-source</ goal>
< goal> testCompile</ goal>
</ goals>
</ execution>
</ executions>
</ plugin>
< plugin>
< groupId> org.apache.maven.plugins</ groupId>
< artifactId> maven-assembly-plugin</ artifactId>
< version> 2.6</ version>
< configuration>
< descriptorRefs>
< descriptorRef> jar-with-dependencies</ descriptorRef>
</ descriptorRefs>
< archive>
< manifest>
< mainClass> </ mainClass>
</ manifest>
</ archive>
</ configuration>
< executions>
< execution>
< id> make-assembly</ id>
< phase> package</ phase>
< goals>
< goal> single</ goal>
</ goals>
</ execution>
</ executions>
</ plugin>
</ plugins>
</ build>
1.2 log4j 配置
log4j. rootLogger= info, stdout
log4j. appender. stdout = org. apache. log4j. ConsoleAppender
log4j. appender. stdout . Target = System. out
log4j. appender. stdout . layout = org. apache. log4j. PatternLayout
log4j. appender. stdout . layout. ConversionPattern = % d{ yyyy- MM- dd HH: mm: ss, SSS} [ % t] [ % c] [ % p] - % m% n
1.3 自定义生产者
package com. tzb. utils;
import org. apache. kafka. clients. producer. KafkaProducer;
import org. apache. kafka. clients. producer. ProducerRecord;
import org. apache. kafka. common. serialization. StringSerializer;
import java. text. SimpleDateFormat;
import java. util. Date;
import java. util. Properties;
import java. util. Random;
public class MyKafkaProducer {
public static void main ( String[ ] args) throws InterruptedException {
Properties prop = new Properties ( ) ;
prop. put ( "bootstrap.servers" , "master:9092" ) ;
prop. put ( "key.serializer" , StringSerializer. class . getName ( ) ) ;
prop. put ( "value.serializer" , StringSerializer. class . getName ( ) ) ;
String topic = "allDate" ;
KafkaProducer< String, String> producer = new KafkaProducer ( prop) ;
while ( true ) {
String message = "{\"dt\":\"" + getCurrentTime ( ) + "\",\"countryCode\":\"" + getCountryCode ( ) + "\",\"data\":[{\"type\":\"" + getRandomType ( ) + "\",\"score\":" + getRandomScore ( ) + ",\"level\":\"" + getRandomLevel ( ) + "\"}" + "," + "{\"type\":\"" + getRandomType ( ) + "\",\"score\":" + getRandomScore ( ) + ",\"level\":\"" + getRandomLevel ( ) + "\"}" + "]}" ;
System. out. println ( message) ;
producer. send ( new ProducerRecord < String, String> ( topic, message) ) ;
Thread. sleep ( 2000 ) ;
}
}
public static String getCurrentTime ( ) {
SimpleDateFormat sdf = new SimpleDateFormat ( "YYYY-MM-dd HH:mm:ss" ) ;
return sdf. format ( new Date ( ) ) ;
}
public static String getCountryCode ( ) {
String[ ] types = { "US" , "TW" , "HK" , "PK" , "KW" , "SA" , "IN" } ;
Random random = new Random ( ) ;
int i = random. nextInt ( types. length) ;
return types[ i] ;
}
public static String getRandomType ( ) {
String[ ] types = { "s1" , "s2" , "s3" , "s4" , "s5" } ;
Random random = new Random ( ) ;
int i = random. nextInt ( types. length) ;
return types[ i] ;
}
public static double getRandomScore ( ) {
double [ ] scores = { 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 } ;
Random random = new Random ( ) ;
int i = random. nextInt ( scores. length) ;
return scores[ i] ;
}
public static String getRandomLevel ( ) {
String[ ] levels = { "A" , "B" , "C" , "D" , "E" } ;
Random random = new Random ( ) ;
int i = random. nextInt ( levels. length) ;
return levels[ i] ;
}
}
1.4 消费者
package com. tzb;
import com. alibaba. fastjson. JSON;
import com. alibaba. fastjson. JSONArray;
import com. alibaba. fastjson. JSONObject;
import com. tzb. source. MyRedisSource;
import org. apache. flink. api. common. serialization. SimpleStringSchema;
import org. apache. flink. contrib. streaming. state. RocksDBStateBackend;
import org. apache. flink. streaming. api. CheckpointingMode;
import org. apache. flink. streaming. api. datastream. DataStream;
import org. apache. flink. streaming. api. datastream. DataStreamSource;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator;
import org. apache. flink. streaming. api. environment. CheckpointConfig;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment;
import org. apache. flink. streaming. api. functions. co. CoFlatMapFunction;
import org. apache. flink. streaming. connectors. kafka. FlinkKafkaConsumer011;
import org. apache. flink. streaming. connectors. kafka. FlinkKafkaProducer011;
import org. apache. flink. streaming. util. serialization. KeyedSerializationSchemaWrapper;
import org. apache. flink. util. Collector;
import scala. tools. nsc. transform. patmat. ScalaLogic;
import java. util. HashMap;
import java. util. Properties;
public class DataClean {
public static void main ( String[ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment. getExecutionEnvironment ( ) ;
env. enableCheckpointing ( 60000 ) ;
env. getCheckpointConfig ( ) . setCheckpointingMode ( CheckpointingMode. EXACTLY_ONCE) ;
env. getCheckpointConfig ( ) . setMinPauseBetweenCheckpoints ( 30000 ) ;
env. getCheckpointConfig ( ) . setCheckpointTimeout ( 10000 ) ;
env. getCheckpointConfig ( ) . setMaxConcurrentCheckpoints ( 1 ) ;
env. getCheckpointConfig ( ) . enableExternalizedCheckpoints ( CheckpointConfig. ExternalizedCheckpointCleanup. RETAIN_ON_CANCELLATION) ;
String topic = "allData" ;
Properties prop = new Properties ( ) ;
prop. setProperty ( "bootstrap.servers" , "master:9092" ) ;
prop. setProperty ( "group.id" , "con1" ) ;
FlinkKafkaConsumer011< String> myConsumer = new FlinkKafkaConsumer011 < String> ( topic, new SimpleStringSchema ( ) , prop) ;
DataStream< String> data = env. addSource ( myConsumer) ;
DataStream< HashMap< String, String> > mapData = env. addSource ( new MyRedisSource ( ) ) ;
DataStream< String> resData = data. connect ( mapData) . flatMap ( new CoFlatMapFunction < String, HashMap< String, String> , String> ( ) {
private HashMap< String, String> allMap = new HashMap < String, String> ( ) ;
@Override
public void flatMap1 ( String s, Collector< String> out) throws Exception {
JSONObject jsonObject = JSON. parseObject ( s) ;
String dt = jsonObject. getString ( "dt" ) ;
String countryCode = jsonObject. getString ( "countryCode" ) ;
String area = allMap. get ( countryCode) ;
JSONArray jsonArray = jsonObject. getJSONArray ( "data" ) ;
for ( int i = 0 ; i < jsonArray. size ( ) ; i++ ) {
JSONObject jsonObject1 = jsonArray. getJSONObject ( i) ;
System. out. println ( "area:---" + area + "-------------" ) ;
jsonObject1. put ( "area" , area) ;
jsonObject1. put ( "dt" , dt) ;
out. collect ( jsonObject1. toJSONString ( ) ) ;
}
}
@Override
public void flatMap2 ( HashMap< String, String> value, Collector< String> collector) throws Exception {
this . allMap = value;
}
} ) ;
String outTopic = "allDataClean" ;
Properties outProp = new Properties ( ) ;
outProp. setProperty ( "bootstrap.servers" , "master:9092" ) ;
outProp. setProperty ( "transaction.timeout.ms" , 60000 * 15 + "" ) ;
FlinkKafkaProducer011< String> myProducer = new FlinkKafkaProducer011 < > ( outTopic,
new KeyedSerializationSchemaWrapper < String> ( new SimpleStringSchema ( ) ) ,
outProp,
FlinkKafkaProducer011. Semantic. EXACTLY_ONCE) ;
resData. addSink ( myProducer) ;
env. execute ( "DataClean" ) ;
}
}
1.5 测试
创建消费者 bin/kafka-console-consumer.sh --bootstrap-server master:9092 --topic allDataClean
启动消费者 DataClean 启动生产者 MyKafkaProducer
错误分析
解决方法
再次验证