from pyspark.sql import SparkSession
# Create SparkSession
spark = SparkSession.builder \
.appName('SparkByExamples.com') \
.getOrCreate()
emp = [(1, "Smith", -1, "2018", "10", "M", 3000), \
(2, "Rose", 1, "2010", "20", "M", 4000), \
(3, "Williams", 1, "2010", "10", "M", 1000), \
(4, "Jones", 2, "2005", "10", "F", 2000), \
(5, "Brown", 2, "2010", "40", "", -1), \
(6, "Brown", 2, "2010", "50", "", -1) \
]
empColumns = ["emp_id", "name", "superior_emp_id", "year_joined", \
"emp_dept_id", "gender", "salary"]
empDF = spark.createDataFrame(data=emp, schema=empColumns)
empDF.printSchema()
empDF.show(truncate=False)
dept = [("Finance", 10), \
("Marketing", 20), \
("Sales", 30), \
("IT", 40) \
]
deptColumns = ["dept_name", "dept_id"]
deptDF = spark.createDataFrame(data=dept, schema=deptColumns)
deptDF.printSchema()
deptDF.show(truncate=False)
empDF.createOrReplaceTempView("emp")
deptDF.createOrReplaceTempView("dept")
spark.sql("select * from emp,dept where emp.emp_dept_id=dept.dept_id").show()
root
|-- emp_id: long (nullable = true)
|-- name: string (nullable = true)
|-- superior_emp_id: long (nullable = true)
|-- year_joined: string (nullable = true)
|-- emp_dept_id: string (nullable = true)
|-- gender: string (nullable = true)
|-- salary: long (nullable = true)
+------+--------+---------------+-----------+-----------+------+------+
|emp_id|name |superior_emp_id|year_joined|emp_dept_id|gender|salary|
+------+--------+---------------+-----------+-----------+------+------+
|1 |Smith |-1 |2018 |10 |M |3000 |
|2 |Rose |1 |2010 |20 |M |4000 |
|3 |Williams|1 |2010 |10 |M |1000 |
|4 |Jones |2 |2005 |10 |F |2000 |
|5 |Brown |2 |2010 |40 | |-1 |
|6 |Brown |2 |2010 |50 | |-1 |
+------+--------+---------------+-----------+-----------+------+------+
root
|-- dept_name: string (nullable = true)
|-- dept_id: long (nullable = true)
+---------+-------+
|dept_name|dept_id|
+---------+-------+
|Finance |10 |
|Marketing|20 |
|Sales |30 |
|IT |40 |
+---------+-------+
+------+--------+---------------+-----------+-----------+------+------+---------+-------+
|emp_id| name|superior_emp_id|year_joined|emp_dept_id|gender|salary|dept_name|dept_id|
+------+--------+---------------+-----------+-----------+------+------+---------+-------+
| 1| Smith| -1| 2018| 10| M| 3000| Finance| 10|
| 3|Williams| 1| 2010| 10| M| 1000| Finance| 10|
| 4| Jones| 2| 2005| 10| F| 2000| Finance| 10|
| 2| Rose| 1| 2010| 20| M| 4000|Marketing| 20|
| 5| Brown| 2| 2010| 40| | -1| IT| 40|
+------+--------+---------------+-----------+-----------+------+------+---------+-------+
进程已结束,退出代码为 0