Spark Streaming入门 - foreachRDD算子使用 - 结果保存到mysql

1 添加依赖

<dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.12</artifactId>
            <version>2.4.3</version>
</dependency>
<dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.12</artifactId>
            <version>2.4.3</version>
</dependency>

<dependency>
            <groupId>c3p0</groupId>
            <artifactId>c3p0</artifactId>
            <version>0.9.1.2</version>
</dependency>
<dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.38</version>
</dependency>

2 连接池相关的代码

package cn.taobao;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import java.beans.PropertyVetoException;
import java.sql.Connection;
import java.sql.SQLException;

public class ConnectionPool {
    private static ComboPooledDataSource dataSource = new ComboPooledDataSource();

    static {
        try {
            dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/taobao?useUnicode=true&allowMultiQueries=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&tinyInt1isBit=false");
            dataSource.setDriverClass("com.mysql.jdbc.Driver");
            dataSource.setUser("root");//设置连接数据库的用户名
            dataSource.setPassword("123456");//设置连接数据库的密码
            dataSource.setMaxPoolSize(40);//设置连接池的最大连接数
            dataSource.setMinPoolSize(2);//设置连接池的最小连接数
            dataSource.setInitialPoolSize(10);//设置连接池的初始连接数
            dataSource.setMaxStatements(100);//设置连接池的缓存Statement的最大数
        } catch (PropertyVetoException e) {
            e.printStackTrace();
        }
    }

    public static Connection getConnection() {
        try {
            return dataSource.getConnection();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return null;
    }

    public static void returnConnection(Connection connection) {
        if (connection != null) {
            try {
                connection.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
}

3 sparkstreaming核心类

package cn.taobao;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.StorageLevels;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.api.java.function.VoidFunction2;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.Time;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.Arrays;
import java.util.Iterator;

public class Save_mysql {
    public static void main(String[] args) throws Exception {
        // StreamingContext 编程入口
        JavaStreamingContext ssc = new JavaStreamingContext(
                "local[*]",
                "Save_1",
                Durations.seconds(1),
                System.getenv("SPARK_HOME"),
                JavaStreamingContext.jarOfClass(Save_mysql.class.getClass()));

        ssc.sparkContext().setLogLevel("ERROR");

        //数据接收器(Receiver)
        //创建一个接收器(JavaReceiverInputDStream),这个接收器接收一台机器上的某个端口通过socket发送过来的数据并处理
        JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
                "158.158.4.49", 9998, StorageLevels.MEMORY_AND_DISK_SER);

        /*
        假如输入 aa bb cc aa
         */

        /*
        返回 aa
            bb
            cc
            aa
         */
        JavaDStream<String> flatMapDStream = lines.flatMap(xx -> {
            String[] str_split = xx.split(" ");
            return Arrays.asList(str_split).iterator();
        });

        /*
        返回 (aa,1)
            (bb,1)
            (cc,1)
            (aa,1)
         */
        JavaPairDStream<String, Integer> mapToPairDStream = flatMapDStream.mapToPair(xx -> {
            return new Tuple2<>(xx, 1);
        });

        /*
        返回 (aa,2)
            (bb,1)
            (cc,1)
         */
        JavaPairDStream<String, Integer> reduceByKeyDStream = mapToPairDStream.reduceByKey((Integer v1, Integer v2) ->
        {
            return v1 + v2;
        });

        reduceByKeyDStream.foreachRDD(new VoidFunction2<JavaPairRDD<String, Integer>, Time>() {
            @Override
            public void call(JavaPairRDD<String, Integer> rdd, Time time) throws Exception {
                rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Integer>>>() {
                    @Override
                    public void call(Iterator<Tuple2<String, Integer>> partitionRecords) throws Exception {

                        Connection conn = ConnectionPool.getConnection();
                        conn.setAutoCommit(false);

                        PreparedStatement preStatement = conn.prepareStatement("insert into wordcount(word,count,ts) values(?,?,?)");
                        int count = 0;

                        while (partitionRecords.hasNext()) {
                            count++;
                            Tuple2<String, Integer> nex = partitionRecords.next();

                            preStatement.setString(1, nex._1());
                            preStatement.setInt(2, nex._2());
                            preStatement.setLong(3, time.milliseconds());

                            preStatement.addBatch();
                            if (count != 0 && count % 500 == 0) {
                                {
                                    preStatement.executeBatch();
                                    conn.commit();
                                }
                            }
                        }

                        preStatement.executeBatch();
                        preStatement.close();

                        conn.commit();
                        conn.setAutoCommit(true);
                        ConnectionPool.returnConnection(conn);
                    }
                });
            }
        });

        //显式的启动数据接收
        ssc.start();
        try {
            //来等待计算完成
            ssc.awaitTermination();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            ssc.close();
        }
    }
}

4 效果展示

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值