Spark:通过Demo来入门Spark

最新推荐文章于 2024-04-06 22:39:32 发布

huo_火力全开

最新推荐文章于 2024-04-06 22:39:32 发布

阅读量381

点赞数

分类专栏： Spark 文章标签： spark hadoop

本文链接：https://blog.csdn.net/huonan_123/article/details/83857210

版权

Spark 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

RDD

统计 worldCount
文件 wc.txt

[hadoop@hadoop001 spark-test-data]$ cat wc.txt 
hello,world,hello
hello,world
welcome

scala> val file = sc.textFile("file:///home/hadoop/data/spark-test-data/wc.txt")
file: org.apache.spark.rdd.RDD[String] = file:///home/hadoop/data/spark-test-data/wc.txt MapPartitionsRDD[13] at textFile at <console>:24

scala> val words=file.flatMap(lines=>lines.split(","))
words: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[14] at flatMap at <console>:30
scala> val wordsM=words.map(word=>(word,1))
wordsM: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[15] at map at <console>:32

scala> val wordsc=wordsM.reduceByKey((x,y)=>x+y)
wordsc: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[16] at reduceByKey at <console>:34

scala> wordsc.collect
res5: Array[(String, Int)] = Array((hello,3), (welcome,1), (world,2))

DataFrame

文件 test2.json

[hadoop@hadoop001 spark-test-data]$ cat test2.json 
{"name":"Yin", "age":18,"address":{"city":"Columbus","state":"Ohio"}}
{"name":"Michael","age":16, "address":{"city":null, "state":"California"}}
[hadoop@hadoop001 spark-test-data]$

scala> val testDf=spark.read.format("json").load("/home/hadoop/data/spark-test-data/test2.json")
testDf: org.apache.spark.sql.DataFrame = [address: struct<city: string, state: string>, age: bigint ... 1 more field]

scala> testDf.printSchema
root
 |-- address: struct (nullable = true)
 |    |-- city: string (nullable = true)
 |    |-- state: string (nullable = true)
 |-- age: long (nullable = true)
 |-- name: string (nullable = true)


scala> testDf.show
+-----------------+---+-------+
|          address|age|   name|
+-----------------+---+-------+
|  [Columbus,Ohio]| 18|    Yin|
|[null,California]| 16|Michael|
+-----------------+---+-------+


scala> testDf.select("name")
res17: org.apache.spark.sql.DataFrame = [name: string]

scala> testDf.select("name").show
+-------+
|   name|
+-------+
|    Yin|
|Michael|
+-------+

Running SQL Queries Programmatically

在程序上使用sql执行


scala> testDf.createOrReplaceTempView("people")

scala> val sqlDF = spark.sql("SELECT * FROM people")
sqlDF: org.apache.spark.sql.DataFrame = [address: struct<city: string, state: string>, age: bigint ... 1 more field]

scala> sqlDF.show
+-----------------+---+-------+
|          address|age|   name|
+-----------------+---+-------+
|  [Columbus,Ohio]| 18|    Yin|
|[null,California]| 16|Michael|
+-----------------+---+-------+


scala>

RDD=>DataFrame 反射的方式

数据:student.data

[hadoop@hadoop001 spark-test-data]$ cat student.data 
1|Burke|1-300-746-8446|ullamcorper.velit.in@ametnullaDonec.co.uk
2|Kamal|1-668-571-5046|pede.Suspendisse@interdumenim.edu
3|Olga|1-956-311-1686|Aenean.eget.metus@dictumcursusNunc.edu
4|Belle|1-246-894-6340|vitae.aliquet.nec@neque.co.uk
5|Trevor|1-300-527-4967|dapibus.id@acturpisegestas.net
6|Laurel|1-691-379-9921|adipiscing@consectetueripsum.edu
7|Sara|1-608-140-1995|Donec.nibh@enimEtiamimperdiet.edu
8|Kaseem|1-881-586-2689|cursus.et.magna@euismod.org
9|Lev|1-916-367-5608|Vivamus.nisi@ipsumdolor.com
10|Maya|1-271-683-2698|accumsan.convallis@ornarelectusjusto.edu
11|Emi|1-467-270-1337|est@nunc.com
12|Caleb|1-683-212-0896|Suspendisse@Quisque.edu
13|Florence|1-603-575-2444|sit.amet.dapibus@lacusAliquamrutrum.ca
14|Anika|1-856-828-7883|euismod@ligulaelit.co.uk
15|Tarik|1-398-171-2268|turpis@felisorci.com
16|Amena|1-878-250-3129|lorem.luctus.ut@scelerisque.com
17|Blossom|1-154-406-9596|Nunc.commodo.auctor@eratSed.co.uk
18|Guy|1-869-521-3230|senectus.et.netus@lectusrutrum.com
19|Malachi|1-608-637-2772|Proin.mi.Aliquam@estarcu.net
20|Edward|1-711-710-6552|lectus@aliquetlibero.co.uk
21||1-711-710-6552|lectus@aliquetlibero.co.uk
22||1-711-710-6552|lectus@aliquetlibero.co.uk
23|NULL|1-711-710-6552|lectus@aliquetlibero.co.uk

第一次(val studentDs=rdd.map(lines=>lines.split("|")))

scala> val rdd = spark.sparkContext.textFile("file:home/hadoop/data/spark-test-data/student.data")
rdd: org.apache.spark.rdd.RDD[String] = file:home/hadoop/data/spark-test-data/student.data MapPartitionsRDD[72] at textFile at <console>:23

scala> val studentDs=rdd.map(lines=>lines.split("|"))
studentDs: org.apache.spark.sql.Dataset[Array[String]] = [value: array<string>]

scala> case class Student(id:Int,name:String,phone:String,emal:String)
defined class Student

scala> val studentDfF = studentDs.map(info=>Student(info(0).toInt,info(1),info(2),info(3))).toDF()
studentDfF: org.apache.spark.sql.DataFrame = [id: int, name: string ... 2 more fields]

scala> studentDfF.printSchema
root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- phone: string (nullable = true)
 |-- emal: string (nullable = true)


scala> studentDfF.show
+---+----+-----+----+
| id|name|phone|emal|
+---+----+-----+----+
|  1|   ||    B|   u|
|  2|   ||    K|   a|
|  3|   ||    O|   l|
|  4|   ||    B|   e|
|  5|   ||    T|   r|
|  6|   ||    L|   a|
|  7|   ||    S|   a|
|  8|   ||    K|   a|
|  9|   ||    L|   e|
|  1|   0|    ||   M|
|  1|   1|    ||   E|
|  1|   2|    ||   C|
|  1|   3|    ||   F|
|  1|   4|    ||   A|
|  1|   5|    ||   T|
|  1|   6|    ||   A|
|  1|   7|    ||   B|
|  1|   8|    ||   G|
|  1|   9|    ||   M|
|  2|   0|    ||   E|
+---+----+-----+----+
only showing top 20 rows

第二次(rdd.map(lines=>lines.split("\|")))

scala> val studentDs=rdd.map(lines=>lines.split("\\|"))
studentDs: org.apache.spark.rdd.RDD[Array[String]] = MapPartitionsRDD[10] at map at <console>:28

scala> val studentDF = studentDs.map(info=>Student(info(0).toInt,info(1),info(2),info(3))).toDF()
studentDF: org.apache.spark.sql.DataFrame = [id: int, name: string ... 2 more fields]

scala> studentDF.show
+---+--------+--------------+--------------------+
| id|    name|         phone|                emal|
+---+--------+--------------+--------------------+
|  1|   Burke|1-300-746-8446|ullamcorper.velit...|
|  2|   Kamal|1-668-571-5046|pede.Suspendisse@...|
|  3|    Olga|1-956-311-1686|Aenean.eget.metus...|
|  4|   Belle|1-246-894-6340|vitae.aliquet.nec...|
|  5|  Trevor|1-300-527-4967|dapibus.id@acturp...|
|  6|  Laurel|1-691-379-9921|adipiscing@consec...|
|  7|    Sara|1-608-140-1995|Donec.nibh@enimEt...|
|  8|  Kaseem|1-881-586-2689|cursus.et.magna@e...|
|  9|     Lev|1-916-367-5608|Vivamus.nisi@ipsu...|
| 10|    Maya|1-271-683-2698|accumsan.convalli...|
| 11|     Emi|1-467-270-1337|        est@nunc.com|
| 12|   Caleb|1-683-212-0896|Suspendisse@Quisq...|
| 13|Florence|1-603-575-2444|sit.amet.dapibus@...|
| 14|   Anika|1-856-828-7883|euismod@ligulaeli...|
| 15|   Tarik|1-398-171-2268|turpis@felisorci.com|
| 16|   Amena|1-878-250-3129|lorem.luctus.ut@s...|
| 17| Blossom|1-154-406-9596|Nunc.commodo.auct...|
| 18|     Guy|1-869-521-3230|senectus.et.netus...|
| 19| Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
| 20|  Edward|1-711-710-6552|lectus@aliquetlib...|
+---+--------+--------------+--------------------+
only showing top 20 rows

RDD=>DataFrame 编程的方式

数据:people.txt

hadoop@hadoop001 spark-test-data]$ cat people.txt 
 Burke	,12
   Kamal	,13
    Olga	,14
   Belle	,15
  Trevor	,16
  Laurel	,17
    Sara	,18
  Kaseem	,19
     Lev	,20
    Maya	,21
     Emi	,22
   Caleb	,23
Florence	,24
   Anika	,25
   Tarik	,26
   Amena	,27
 Blossom	,28
     Guy	,29
 Malachi	,30
  Edward	,31

操作

scala> val rdd = spark.sparkContext.textFile("file:///home/hadoop/data/spark-test-data/people.txt")
rdd: org.apache.spark.rdd.RDD[String] = file:///home/hadoop/data/spark-test-data/people.txt MapPartitionsRDD[16] at textFile at <console>:26


scala> import org.apache.spark.sql.types._
import org.apache.spark.sql.types._

scala> import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}
import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}

scala> val rowRDD = rdd.map(_.split(",")).map(attributes => Row(attributes(0).trim, attributes(1).trim.toInt))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[33] at map at <console>:33

scala> val schema = StructType(Array(StructField("name", StringType, true),
     |       StructField("age", IntegerType, true)))
schema: org.apache.spark.sql.types.StructType = StructType(StructField(name,StringType,true), StructField(age,IntegerType,true))

scala> val pepleRDD = spark.createDataFrame(rowRDD,schema)
pepleRDD: org.apache.spark.sql.DataFrame = [name: string, age: int]

scala> pepleRDD.show
+--------+---+
|    name|age|
+--------+---+
|   Burke| 12|
|   Kamal| 13|
|    Olga| 14|
|   Belle| 15|
|  Trevor| 16|
|  Laurel| 17|
|    Sara| 18|
|  Kaseem| 19|
|     Lev| 20|
|    Maya| 21|
|     Emi| 22|
|   Caleb| 23|
|Florence| 24|
|   Anika| 25|
|   Tarik| 26|
|   Amena| 27|
| Blossom| 28|
|     Guy| 29|
| Malachi| 30|
|  Edward| 31|
+--------+---+

paquet

文件:users.parquet

scala> val userDF=spark.read.load("/home/hadoop/data/spark-test-data/resources/users.parquet")
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
userDF: org.apache.spark.sql.DataFrame = [name: string, favorite_color: string ... 1 more field]

scala> userDF.printSchema
root
 |-- name: string (nullable = true)
 |-- favorite_color: string (nullable = true)
 |-- favorite_numbers: array (nullable = true)
 |    |-- element: integer (containsNull = true)


scala> userDF.show
18/11/08 02:34:45 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+------+--------------+----------------+
|  name|favorite_color|favorite_numbers|
+------+--------------+----------------+
|Alyssa|          null|  [3, 9, 15, 20]|
|   Ben|           red|              []|
+------+--------------+----------------+

cvs

scala> val salesDFCsv = spark.read.format("csv").option("sep", ",").option("inferSchema", "true").option("header", "true").load("/home/hadoop/data/spark-test-data/resources/sales.csv")
salesDFCsv: org.apache.spark.sql.DataFrame = [transactionId: int, customerId: int ... 2 more fields]

scala> salesDFCsv.show
+-------------+----------+------+----------+
|transactionId|customerId|itemId|amountPaid|
+-------------+----------+------+----------+
|          111|         1|     1|     100.0|
|          112|         2|     2|     505.0|
|          113|         3|     3|     510.0|
|          114|         4|     4|     600.0|
|          115|         1|     2|     500.0|
|          116|         1|     2|     500.0|
|          117|         1|     2|     500.0|
|          118|         1|     2|     500.0|
|          119|         2|     3|     500.0|
|          120|         1|     2|     500.0|
|          121|         1|     4|     500.0|
|          122|         1|     2|     500.0|
|          123|         1|     4|     500.0|
|          124|         1|     2|     500.0|
+-------------+----------+------+----------+

spark操作hive

操作前准备

spark-env.shl

在$SPARK_HOME/conf 下的spark-env.shl 添加

export SPARK_CLASSPATH=$HIVE_HOME/lib/mysql-connector-java-5.1.47.jar

复制 $HIVE_HOME/conf 下的

hive-site.xml 到$SPARK_HOME/conf

复制 $HIVE_HOME/conf 下的

读Hive
- 第一种方法

scala> val hiveDF=spark.table("emp_p")
hiveDF: org.apache.spark.sql.DataFrame = [empno: int, ename: string ... 6 more fields]

scala> hiveDF.show
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
+-----+------+---------+----+----------+-------+------+------+
|empno| ename|      job| mgr|  hiredate|    sal|  comm|deptno|
+-----+------+---------+----+----------+-------+------+------+
| 7369| SMITH|    CLERK|7902|1980-12-17|  800.0|  null|    20|
| 7499| ALLEN| SALESMAN|7698| 1981-2-20| 1600.0| 300.0|    30|
| 7521|  WARD| SALESMAN|7698| 1981-2-22| 1250.0| 500.0|    30|
| 7566| JONES|  MANAGER|7839|  1981-4-2| 2975.0|  null|    20|
| 7654|MARTIN| SALESMAN|7698| 1981-9-28| 1250.0|1400.0|    30|
| 7698| BLAKE|  MANAGER|7839|  1981-5-1| 2850.0|  null|    30|
| 7782| CLARK|  MANAGER|7839|  1981-6-9| 2450.0|  null|    10|
| 7788| SCOTT|  ANALYST|7566| 1987-4-19| 3000.0|  null|    20|
| 7839|  KING|PRESIDENT|null|1981-11-17| 5000.0|  null|    10|
| 7844|TURNER| SALESMAN|7698|  1981-9-8| 1500.0|   0.0|    30|
| 7876| ADAMS|    CLERK|7788| 1987-5-23| 1100.0|  null|    20|
| 7900| JAMES|    CLERK|7698| 1981-12-3|  950.0|  null|    30|
| 7902|  FORD|  ANALYST|7566| 1981-12-3| 3000.0|  null|    20|
| 7934|MILLER|    CLERK|7782| 1982-1-23| 1300.0|  null|    10|
| 8888|  HIVE|  PROGRAM|7839| 1988-1-23|10300.0|  null|  null|
+-----+------+---------+----+----------+-------+------+------+

第二种方法

:spark.sql(“select * from emp_p”).show

scala> spark.sql("select * from emp_p").show
+-----+------+---------+----+----------+-------+------+------+
|empno| ename|      job| mgr|  hiredate|    sal|  comm|deptno|
+-----+------+---------+----+----------+-------+------+------+
| 7369| SMITH|    CLERK|7902|1980-12-17|  800.0|  null|    20|
| 7499| ALLEN| SALESMAN|7698| 1981-2-20| 1600.0| 300.0|    30|
| 7521|  WARD| SALESMAN|7698| 1981-2-22| 1250.0| 500.0|    30|
| 7566| JONES|  MANAGER|7839|  1981-4-2| 2975.0|  null|    20|
| 7654|MARTIN| SALESMAN|7698| 1981-9-28| 1250.0|1400.0|    30|
| 7698| BLAKE|  MANAGER|7839|  1981-5-1| 2850.0|  null|    30|
| 7782| CLARK|  MANAGER|7839|  1981-6-9| 2450.0|  null|    10|
| 7788| SCOTT|  ANALYST|7566| 1987-4-19| 3000.0|  null|    20|
| 7839|  KING|PRESIDENT|null|1981-11-17| 5000.0|  null|    10|
| 7844|TURNER| SALESMAN|7698|  1981-9-8| 1500.0|   0.0|    30|
| 7876| ADAMS|    CLERK|7788| 1987-5-23| 1100.0|  null|    20|
| 7900| JAMES|    CLERK|7698| 1981-12-3|  950.0|  null|    30|
| 7902|  FORD|  ANALYST|7566| 1981-12-3| 3000.0|  null|    20|
| 7934|MILLER|    CLERK|7782| 1982-1-23| 1300.0|  null|    10|
| 8888|  HIVE|  PROGRAM|7839| 1988-1-23|10300.0|  null|  null|
+-----+------+---------+----+----------+-------+------+------+

第三种方法


scala> sql("select * from emp_p").show
+-----+------+---------+----+----------+-------+------+------+
|empno| ename|      job| mgr|  hiredate|    sal|  comm|deptno|
+-----+------+---------+----+----------+-------+------+------+
| 7369| SMITH|    CLERK|7902|1980-12-17|  800.0|  null|    20|
| 7499| ALLEN| SALESMAN|7698| 1981-2-20| 1600.0| 300.0|    30|
| 7521|  WARD| SALESMAN|7698| 1981-2-22| 1250.0| 500.0|    30|
| 7566| JONES|  MANAGER|7839|  1981-4-2| 2975.0|  null|    20|
| 7654|MARTIN| SALESMAN|7698| 1981-9-28| 1250.0|1400.0|    30|
| 7698| BLAKE|  MANAGER|7839|  1981-5-1| 2850.0|  null|    30|
| 7782| CLARK|  MANAGER|7839|  1981-6-9| 2450.0|  null|    10|
| 7788| SCOTT|  ANALYST|7566| 1987-4-19| 3000.0|  null|    20|
| 7839|  KING|PRESIDENT|null|1981-11-17| 5000.0|  null|    10|
| 7844|TURNER| SALESMAN|7698|  1981-9-8| 1500.0|   0.0|    30|
| 7876| ADAMS|    CLERK|7788| 1987-5-23| 1100.0|  null|    20|
| 7900| JAMES|    CLERK|7698| 1981-12-3|  950.0|  null|    30|
| 7902|  FORD|  ANALYST|7566| 1981-12-3| 3000.0|  null|    20|
| 7934|MILLER|    CLERK|7782| 1982-1-23| 1300.0|  null|    10|
| 8888|  HIVE|  PROGRAM|7839| 1988-1-23|10300.0|  null|  null|
+-----+------+---------+----+----------+-------+------+------+

写Hive

注意:默认存储格式 parquet

scala> val usersPath = "file:///home/hadoop/data/spark-test-data/resources/users.parquet"
usersPath: String = file:///home/hadoop/data/spark-test-data/resources/users.parquet

scala> val usersDF = spark.read.load(usersPath)

scala> usersDF.write.format("parquet").saveAsTable("ruoze_d5.namesPartByColor3")
18/11/08 07:53:25 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl

hive (ruoze_d5)> select * from namespartbycolor3;
OK
namespartbycolor3.name	namespartbycolor3.favorite_color	namespartbycolor3.favorite_numbers
Alyssa	NULL	[3,9,15,20]
Ben	red	NULL
Time taken: 0.771 seconds, Fetched: 2 row(s)

注意

spark写入hive的时候要写入的数据库要与hive-site.xml 里配置的一致,否则即使在写入完成后,在hive里有表,但是表里没有数据

写入分区

scala> peopleDF.write.partitionBy("age").saveAsTable("ruoze_d5.spark_people")
[hadoop@hadoop001 ~]$ hadoop fs -ls /user/hive/warehouse/ruoze_d5.db/spark_people
18/11/08 10:51:13 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 4 items
-rw-r--r--   3 hadoop supergroup          0 2018-11-08 10:49 /user/hive/warehouse/ruoze_d5.db/spark_people/_SUCCESS
drwxr-xr-x   - hadoop supergroup          0 2018-11-08 10:48 /user/hive/warehouse/ruoze_d5.db/spark_people/age=19
drwxr-xr-x   - hadoop supergroup          0 2018-11-08 10:49 /user/hive/warehouse/ruoze_d5.db/spark_people/age=30
drwxr-xr-x   - hadoop supergroup          0 2018-11-08 10:48 /user/hive/warehouse/ruoze_d5.db/spark_people/age=__HIVE_DEFAULT_PARTITION__

hive 操作 MySQL

读MySQL


scala> val mysqlDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306").option("dbtable", "sqoop.emp").option("user", "root").option("password", "123456").option("driver", "com.mysql.jdbc.Driver").load()
mysqlDF: org.apache.spark.sql.DataFrame = [empno: decimal(4,0), ename: string ... 6 more fields]

scala> mysqlDF.show
+-----+--------+---------+----+--------------------+--------+--------+------+
|empno|   ename|      job| mgr|            hiredate|     sal|    comm|deptno|
+-----+--------+---------+----+--------------------+--------+--------+------+
| 1111|     lxp|       bb|8888|2018-12-12 00:00:...|16000.00|30000.00|    40|
| 7369|   SMITH|    CLERK|7902|1980-12-17 00:00:...|  800.00|    null|    20|
| 7499|   ALLEN| SALESMAN|7698|1981-02-20 00:00:...| 1600.00|  300.00|    30|
| 7521|    WARD| SALESMAN|7698|1981-02-22 00:00:...| 1250.00|  500.00|    30|
| 7566|   JONES|  MANAGER|7839|1981-04-02 00:00:...| 2975.00|    null|    20|
| 7654|  MARTIN| SALESMAN|7698|1981-09-28 00:00:...| 1250.00| 1400.00|    30|
| 7698|   BLAKE|  MANAGER|7839|1981-05-01 00:00:...| 2850.00|    null|    30|
| 7782|   CLARK|  MANAGER|7839|1981-06-09 00:00:...| 2450.00|    null|    10|
| 7788|   SCOTT|  ANALYST|7566|1982-12-09 00:00:...| 3000.00|    null|    20|
| 7839|    KING|PRESIDENT|null|1981-11-17 00:00:...| 5000.00|    null|    10|
| 7844|  TURNER| SALESMAN|7698|1981-09-08 00:00:...| 1500.00|    0.00|    30|
| 7876|   ADAMS|    CLERK|7788|1983-01-12 00:00:...| 1100.00|    null|    20|
| 7900|   JAMES|    CLERK|7698|1981-12-03 00:00:...|  950.00|    null|    30|
| 7902|    FORD|  ANALYST|7566|1981-12-03 00:00:...| 3000.00|    null|    20|
| 7934|  MILLER|    CLERK|7782|1982-01-23 00:00:...| 1300.00|    null|    10|
| 8888|xiaoming|     null|null|                null|    null|    null|  null|
| 9999|     lxp|       bb|9999|2018-12-12 00:00:...|16000.00|30000.00|    40|
+-----+--------+---------+----+--------------------+--------+--------+------+

写入MySQL

scala>  val mysqlDF = peopleDF.write.format("jdbc").option("url", "jdbc:mysql://localhost:3306").option("dbtable", "sqoop.spark_people").option("user", "root").option("password", "123456").option("driver", "com.mysql.jdbc.Driver").save()
mysqlDF: Unit = ()

mysql> select * from spark_people;
+------+---------+
| age  | name    |
+------+---------+
| NULL | Michael |
|   30 | Andy    |
|   19 | Justin  |
+------+---------+
3 rows in set (0.00 sec)