Maven
<properties>
<java.version>1.8</java.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.12.10</scala.version>
<spark.version>3.0.0</spark.version>
<encoding>UTF-8</encoding>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.49</version>
</dependency>
</dependencies>
Java
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import java.util.HashMap;
import java.util.Map;
public class SparkMySQLToHdfs {
public static void main(String[] args) {
System.setProperty("HADOOP_USER_NAME","root"); //用户权限写入HDFS
SparkSession spark = SparkSession
.builder()
.appName("SparkMySQLToHdfs ")
.master("local[2]") // 生产注掉
.config("spark.sql.warehouse.dir", "file:///C:/GitCoder/spark/file") // 生产注掉
.getOrCreate();
String url = "jdbc:mysql://127.0.0.1:3306/shop_db?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC";
Map<String, String> options = new HashMap<>();
options.put("driver", "com.mysql.jdbc.Driver");
options.put("url", url);
options.put("user", "root");
options.put("password", "xx");
options.put("dbtable", "t_student");
Dataset<Row> dataset = spark.read().format("jdbc").options(options).load();
dataset.show();
dataset.write().mode(SaveMode.Append).format("parquet").save("hdfs://192.168.0.xxx:9000/user/root/data/mysql/student_03");
spark.stop();
}
}