package org.study.gao.rdd;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.JdbcRDD;
import scala.Serializable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.List;
public class JdbcRddDemo {
public static void main(String[] args) {
// 第一步:创建SparkConf对象,设置相关配置信息
SparkConf conf = new SparkConf();
conf.setAppName("JdbcRddDemo");
//本地模式
conf.setMaster("local");
// 第二步:创建JavaSparkContext对象,SparkContext是Spark的所有功能的入口
JavaSparkContext context = new JavaSparkContext(conf);
// 第三步:创建一个初始的RDD 这个RDD会记录以后从MySQL中读数据
JavaRDD<MyData> javaRDD = JdbcRDD.create(
context, //传入SparkContext
() -> {
String url = "jdbc:mysql://192.168.0.1:3306/scheme_dev?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai&useSSL=false&allowPublicKeyRetrieval=true&allowMultiQueries=true&rewriteBatchedStatements=true&nullCatalogMeansCurrent=true";
String userName = "root";
String password = "123456";
try {
Class.forName("com.mysql.cj.jdbc.Driver");
return DriverManager.getConnection(url, userName, password);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}, //建立连接
"SELECT * FROM form_item WHERE id >= ? AND id < ?", //sql语句
1, //范围下界 ,指定条件的时候使用 ,填充问号
500, //范围上界
2, //分区数量
rs -> {
// 将查询结果的每一行映射为自定义的数据类型MyData
int id = 0;
try {
id = rs.getInt("id");
String name = rs.getString("title");
return new MyData(id, name);
} catch (SQLException throwables) {
throwables.printStackTrace();
return new MyData();
}
}
);
//new 了RDD,里面没有真正要计算的数据,而是告诉这个RDD,以后触发Action时到哪里读取数据
List<MyData> collect = javaRDD.collect();//触发Action
collect.stream().forEach(System.out::println);
context.stop();
}
static class MyData implements Serializable {
int id;
String name;
public MyData() {
this.id = 1;
this.name = "name";
}
public MyData(int id, String name) {
this.id = id;
this.name = name;
}
@Override
public String toString() {
return "MyData{" +
"id=" + id +
", name='" + name + '\'' +
'}';
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.study</groupId>
<artifactId>spark-learn</artifactId>
<version>0.0.1</version>
<!--定义spark版本-->
<properties>
<spark.version>3.5.0</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- 日志 -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.20.0</version>
</dependency>
<!--数据库驱动-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.18</version>
</dependency>
</dependencies>
</project>