主要代码
package cn.nanxiuzi.kafka.kafka2mysql;import cn.nanxiuzi.kafka.KafkaDic;import com.google.common.collect.Lists;import org.apache.flink.api.common.functions.MapFunction;import org.apache.flink.api.common.serialization.SimpleStringSchema;import org.apache.flink.streaming.api.CheckpointingMode;import org.apache.flink.streaming.api.datastream.DataStreamSource;import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;import org.apache.flink.streaming.api.windowing.time.Time;import org.apache.flink.streaming.api.windowing.windows.TimeWindow;import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;import org.apache.flink.util.Collector;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.util.ArrayList;import java.util.List;import java.util.Properties;/** * 读取kafka之后写入mysql */public class Kafka2Mysql { public static final Logger logger = LoggerFactory.getLogger(Kafka2Mysql.class); public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //checkpoint的设置 //每隔10s进行启动一个检查点【设置checkpoint的周期】 env.enableCheckpointing(10000); //设置模式为:exactly_one,仅一次语义 env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); //确保检查点之间有1s的时间间隔【checkpoint最小间隔】 env.getCheckpointConfig().setMinPauseBetweenCheckpoints(1000); //检查点必须在10s之内完成,或者被丢弃【checkpoint超时时间】 env.getCheckpointConfig().setCheckpointTimeout(10000); //同一时间只允许进行一次检查点 env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); //表示一旦Flink程序被cancel后,会保留checkpoint数据,以便根据实际需要恢复到指定的checkpoint //env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); //设置statebackend,将检查点保存在hdfs上面,默认保存在内存中。这里先保存到本地// env.setStateBackend(new FsStateBackend("file:///Users/temp/cp/")); Properties ppt = new Properties(); ppt.setProperty("bootstrap.servers", KafkaDic.Kafka_ADDRESS_COLLECTION); ppt.setProperty("group.id", KafkaDic.CONSUMER_GROUP_ID); ppt.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); ppt.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(KafkaDic.CONSUMER_TOPIC, new SimpleStringSchema(), ppt); flinkKafkaConsumer.setStartFromLatest(); DataStreamSource kafkaSource = env.addSource(flinkKafkaConsumer).setParallelism(1); kafkaSource.map(new MapFunction<String, String>() { @Override public String map(String o) throws Exception { return o.split(",")[0]; } }).timeWindowAll(Time.seconds(10)).process(new ProcessAllWindowFunction<String, List<String>, TimeWindow>() { @Override public void process(Context context, Iterable<String> elements, Collector<List<String>> out) throws Exception { ArrayList<String> strings = Lists.newArrayList(elements); if (strings.size() > 0) { out.collect(strings); } } }).addSink(new MysqlSink()).setParallelism(1); env.execute("MysqlSink"); }}
Kafka生产者代码
package cn.nanxiuzi.kafka;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.ProducerRecord;import java.util.Properties;public class MyKafkaProducer { public static void main(String[] args) { Properties props = new Properties(); props.put("bootstrap.servers", KafkaDic.Kafka_ADDRESS_COLLECTION); props.put("acks", "all"); props.put("retries", 0); props.put("batch.size", 16384); props.put("linger.ms", 1); props.put("buffer.memory", 33554432); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); KafkaProducer<String,String> kafkaProducer = new KafkaProducer<>(props); for(int i=0;i<10000;i++){ String messageContext=String.format("姓名%s,广东深圳%s,身高%s,体重%s,电话%s",Integer.toString(i),Integer.toString(i),Integer.toString(i),Integer.toString(i),Integer.toString(i)); kafkaProducer.send(new ProducerRecord<String,String>(KafkaDic.PRODUCER_TOPIC,Integer.toString(i),messageContext)); System.out.println("sented:"+messageContext); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } kafkaProducer.close(); }}
DBConnectUtil
package cn.nanxiuzi.kafka.kafka2mysql;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.sql.DriverManager;import java.sql.SQLException;import java.sql.Connection;/** * Created with IntelliJ IDEA. * User: zzy * Date: 2019/5/28 * Time: 8:58 PM * To change this template use File | Settings | File Templates. */public class DBConnectUtil { private static final Logger log = LoggerFactory.getLogger(DBConnectUtil.class); /** * 获取连接 * * @param url * @param user * @param password * @return * @throws SQLException */ public static Connection getConnection(String url, String user, String password) throws SQLException { Connection conn = null; try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { log.error("获取mysql.jdbc.Driver失败"); e.printStackTrace(); } try { conn = DriverManager.getConnection(url, user, password); log.info("获取连接:{} 成功...",conn); }catch (Exception e){ log.error("获取连接失败,url:" + url + ",user:" + user); } //设置手动提交 //conn.setAutoCommit(false); return conn; } /** * 提交事物 */ public static void commit(Connection conn) { if (conn != null) { try { conn.commit(); } catch (SQLException e) { log.error("提交事物失败,Connection:" + conn); e.printStackTrace(); } finally { close(conn); } } } /** * 事物回滚 * * @param conn */ public static void rollback(Connection conn) { if (conn != null) { try { conn.rollback(); } catch (SQLException e) { log.error("事物回滚失败,Connection:" + conn); e.printStackTrace(); } finally { close(conn); } } } /** * 关闭连接 * * @param conn */ public static void close(Connection conn) { if (conn != null) { try { conn.close(); } catch (SQLException e) { log.error("关闭连接失败,Connection:" + conn); e.printStackTrace(); } } }}
输出
pom.xml
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0modelVersion> <groupId>cn.nanxiuzigroupId> <artifactId>myflinkartifactId> <version>1.0-SNAPSHOTversion> <properties> <flink-version>1.11.2flink-version> <log4j-version>1.2.17log4j-version> properties> <dependencies> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-javaartifactId> <version>${flink-version}version> <scope>providedscope> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-coreartifactId> <version>${flink-version}version> <scope>providedscope> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-streaming-java_2.11artifactId> <version>${flink-version}version> <scope>providedscope> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-connector-kafka_2.11artifactId> <version>${flink-version}version> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-hbase_2.11artifactId> <version>1.10.2version> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-connector-filesystem_2.11artifactId> <version>${flink-version}version> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-connector-jdbc_2.11artifactId> <version>${flink-version}version> dependency> <dependency> <groupId>org.apache.flinkgroupId> <artifactId>flink-clients_2.11artifactId> <version>${flink-version}version> dependency> <dependency> <groupId>com.github.javafakergroupId> <artifactId>javafakerartifactId> <version>1.0.2version> dependency> <dependency> <groupId>log4jgroupId> <artifactId>log4jartifactId> <version>${log4j-version}version> dependency> <dependency> <groupId>com.fasterxml.jackson.coregroupId> <artifactId>jackson-databindartifactId> <version>2.11.2version> dependency> <dependency> <groupId>com.alibabagroupId> <artifactId>fastjsonartifactId> <version>1.2.73version> dependency> <dependency> <groupId>mysqlgroupId> <artifactId>mysql-connector-javaartifactId> <version>5.1.25version> dependency> dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.pluginsgroupId> <artifactId>maven-shade-pluginartifactId> <version>3.1.1version> <executions> <execution> <phase>packagephase> <goals> <goal>shadegoal> goals> <configuration> <artifactSet> <excludes> <exclude>com.google.code.findbugs:jsr305exclude> <exclude>org.slf4j:*exclude> <exclude>log4j:*exclude> excludes> artifactSet> <filters> <filter> <artifact>*:*artifact> <excludes> <exclude>META-INF/*.SFexclude> <exclude>META-INF/*.DSAexclude> <exclude>META-INF/*.RSAexclude> excludes> filter> filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <mainClass>my.programs.main.clazzmainClass> transformer> transformers> configuration> execution> executions> plugin> <plugin> <groupId>org.apache.maven.pluginsgroupId> <artifactId>maven-compiler-pluginartifactId> <configuration> <source>8source> <target>8target> configuration> plugin> plugins> build>project>