[root@centos00 ~]$ cd /opt/cdh5.14.2/hadoop-2.6.0-cdh5.14.2/
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode
[root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/
[root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore &
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ cd ../spark-2.2.1-cdh5.14.2/
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ bin/spark-shell --master local[2]
scala> val df = spark.createDataFrame(Seq(
("Tom", 20, 15552211521L),
("Jack", 19, 13287994007L),
("Tony", 21, 15552211523L),
("Tom", 20, 15552211521L),
("David", 22, 15552211523L),
("Alex", 25, 15552211523L)
)) toDF("name", "age", "phone")
df: org.apache.spark.sql.DataFrame = [name: string, age: int ... 1 more field]
scala> df.show()
+-----+---+-----------+
| name|age| phone|
+-----+---+-----------+
| Tom| 20|15552211521|
| Jack| 19|13287994007|
| Tony| 21|15552211523|
| Tom| 20|15552211521|
|David| 22|15552211523|
| Alex| 25|15552211523|
+-----+---+-----------+
scala> import spark.implicits._
import spark.implicits._
scala> case class Person(name: String, age: String, phone: String)
defined class Person
scala> val person = df.as[Person]
person: org.apache.spark.sql.Dataset[Person] = [name: string, age: int ... 1 more field]
scala> person.show
+-----+---+-----------+
| name|age| phone|
+-----+---+-----------+
| Tom| 20|15552211521|
| Jack| 19|13287994007|
| Tony| 21|15552211523|
| Tom| 20|15552211521|
|David| 22|15552211523|
| Alex| 25|15552211523|
+-----+---+-----------+
// 去重(所有字段都相同)
scala> person.distinct().show()
+-----+---+-----------+
| name|age| phone|
+-----+---+-----------+
|David| 22|15552211523|
| Alex| 25|15552211523|
| Tom| 20|15552211521|
| Tony| 21|15552211523|
| Jack| 19|13287994007|
+-----+---+-----------+
// 去重(某一字段相同)
scala> person.dropDuplicates("phone").show()
+----+---+-----------+
|name|age| phone|
+----+---+-----------+
|Jack| 19|13287994007|
| Tom| 20|15552211521|
|Tony| 21|15552211523|
+----+---+-----------+