Type of join to perform. Default inner. Must be one of: inner, cross, outer, full, full_outer, left, left_outer, right, right_outer, left_semi, left_anti.
Supported join types include:
inner
outer
full
fullouter
full_outer
leftouter
left
left_outer
rightouter
right
right_outer
leftsemi
left_semi
leftanti
left_anti
cross
Here is a simple illustrative experiment:
import org.apache.spark.sql._
object SparkSandbox extends App {
implicit val spark = SparkSession.builder().master("local[*]").getOrCreate()
import spark.implicits._
spark.sparkContext.setLogLevel("ERROR")
val left = Seq((1, "A1"), (2, "A2"), (3, "A3"), (4, "A4")).toDF("id", "value")
val right = Seq((3, "A3"), (4, "A4"), (4, "A4_1"), (5, "A5"), (6, "A6")).toDF("id", "value")
println("LEFT")
left.orderBy("id").show()
println("RIGHT")
right.orderBy("id").show()
val joinTypes = Seq("inner", "outer", "full", "full_outer", "left", "left_outer", "right", "right_outer", "left_semi", "left_anti")
joinTypes foreach { joinType =>
println(s"${joinType.toUpperCase()} JOIN")
left.join(right = right, usingColumns = Seq("id"), joinType = joinType).orderBy("id").show()
}
}
Output:
LEFT
+---+-----+
| id|value|
+---+-----+
| 1| A1|
| 2| A2|
| 3| A3|
| 4| A4|
+---+-----+
RIGHT
+---+-----+
| id|value|
+---+-----+
| 3| A3|
| 4| A4|
| 4| A4_1|
| 5| A5|
| 6| A6|
+---+-----+
INNER JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 3| A3| A3|
| 4| A4| A4_1|
| 4| A4| A4|
+---+-----+-----+
OUTER JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 1| A1| null|
| 2| A2| null|
| 3| A3| A3|
| 4| A4| A4|
| 4| A4| A4_1|
| 5| null| A5|
| 6| null| A6|
+---+-----+-----+
FULL JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 1| A1| null|
| 2| A2| null|
| 3| A3| A3|
| 4| A4| A4|
| 4| A4| A4_1|
| 5| null| A5|
| 6| null| A6|
+---+-----+-----+
FULL_OUTER JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 1| A1| null|
| 2| A2| null|
| 3| A3| A3|
| 4| A4| A4|
| 4| A4| A4_1|
| 5| null| A5|
| 6| null| A6|
+---+-----+-----+
LEFT JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 1| A1| null|
| 2| A2| null|
| 3| A3| A3|
| 4| A4| A4_1|
| 4| A4| A4|
+---+-----+-----+
LEFT_OUTER JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 1| A1| null|
| 2| A2| null|
| 3| A3| A3|
| 4| A4| A4_1|
| 4| A4| A4|
+---+-----+-----+
RIGHT JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 3| A3| A3|
| 4| A4| A4_1|
| 4| A4| A4|
| 5| null| A5|
| 6| null| A6|
+---+-----+-----+
RIGHT_OUTER JOIN
+---+-----+-----+
| id|value|value|
+---+-----+-----+
| 3| A3| A3|
| 4| A4| A4|
| 4| A4| A4_1|
| 5| null| A5|
| 6| null| A6|
+---+-----+-----+
LEFT_SEMI JOIN
+---+-----+
| id|value|
+---+-----+
| 3| A3|
| 4| A4|
+---+-----+
LEFT_ANTI JOIN
+---+-----+
| id|value|
+---+-----+
| 1| A1|
| 2| A2|
+---+-----+