1 添加依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>c3p0</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.1.2</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
2 连接池相关的代码
package cn.taobao;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import java.beans.PropertyVetoException;
import java.sql.Connection;
import java.sql.SQLException;
public class ConnectionPool {
private static ComboPooledDataSource dataSource = new ComboPooledDataSource();
static {
try {
dataSource.setJdbcUrl("jdbc:mysql://localhost:3306/taobao?useUnicode=true&allowMultiQueries=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&tinyInt1isBit=false");
dataSource.setDriverClass("com.mysql.jdbc.Driver");
dataSource.setUser("root");//设置连接数据库的用户名
dataSource.setPassword("123456");//设置连接数据库的密码
dataSource.setMaxPoolSize(40);//设置连接池的最大连接数
dataSource.setMinPoolSize(2);//设置连接池的最小连接数
dataSource.setInitialPoolSize(10);//设置连接池的初始连接数
dataSource.setMaxStatements(100);//设置连接池的缓存Statement的最大数
} catch (PropertyVetoException e) {
e.printStackTrace();
}
}
public static Connection getConnection() {
try {
return dataSource.getConnection();
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
public static void returnConnection(Connection connection) {
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
3 sparkstreaming核心类
package cn.taobao;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.StorageLevels;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.api.java.function.VoidFunction2;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.Time;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.Arrays;
import java.util.Iterator;
public class Save_mysql {
public static void main(String[] args) throws Exception {
// StreamingContext 编程入口
JavaStreamingContext ssc = new JavaStreamingContext(
"local[*]",
"Save_1",
Durations.seconds(1),
System.getenv("SPARK_HOME"),
JavaStreamingContext.jarOfClass(Save_mysql.class.getClass()));
ssc.sparkContext().setLogLevel("ERROR");
//数据接收器(Receiver)
//创建一个接收器(JavaReceiverInputDStream),这个接收器接收一台机器上的某个端口通过socket发送过来的数据并处理
JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
"158.158.4.49", 9998, StorageLevels.MEMORY_AND_DISK_SER);
/*
假如输入 aa bb cc aa
*/
/*
返回 aa
bb
cc
aa
*/
JavaDStream<String> flatMapDStream = lines.flatMap(xx -> {
String[] str_split = xx.split(" ");
return Arrays.asList(str_split).iterator();
});
/*
返回 (aa,1)
(bb,1)
(cc,1)
(aa,1)
*/
JavaPairDStream<String, Integer> mapToPairDStream = flatMapDStream.mapToPair(xx -> {
return new Tuple2<>(xx, 1);
});
/*
返回 (aa,2)
(bb,1)
(cc,1)
*/
JavaPairDStream<String, Integer> reduceByKeyDStream = mapToPairDStream.reduceByKey((Integer v1, Integer v2) ->
{
return v1 + v2;
});
reduceByKeyDStream.foreachRDD(new VoidFunction2<JavaPairRDD<String, Integer>, Time>() {
@Override
public void call(JavaPairRDD<String, Integer> rdd, Time time) throws Exception {
rdd.foreachPartition(new VoidFunction<Iterator<Tuple2<String, Integer>>>() {
@Override
public void call(Iterator<Tuple2<String, Integer>> partitionRecords) throws Exception {
Connection conn = ConnectionPool.getConnection();
conn.setAutoCommit(false);
PreparedStatement preStatement = conn.prepareStatement("insert into wordcount(word,count,ts) values(?,?,?)");
int count = 0;
while (partitionRecords.hasNext()) {
count++;
Tuple2<String, Integer> nex = partitionRecords.next();
preStatement.setString(1, nex._1());
preStatement.setInt(2, nex._2());
preStatement.setLong(3, time.milliseconds());
preStatement.addBatch();
if (count != 0 && count % 500 == 0) {
{
preStatement.executeBatch();
conn.commit();
}
}
}
preStatement.executeBatch();
preStatement.close();
conn.commit();
conn.setAutoCommit(true);
ConnectionPool.returnConnection(conn);
}
});
}
});
//显式的启动数据接收
ssc.start();
try {
//来等待计算完成
ssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
} finally {
ssc.close();
}
}
}
4 效果展示