使用pyspark
操作数据库
1.sqlite
篇
from pyspark.sql import SparkSession
if __name__ == '__main__':
spark = SparkSession.builder.appName("sqlite").getOrCreate()
url = "jdbc:sqlite:D:/work/workspace/python_executor_db/data/dbs/movies.db?rewriteBatchedStatements=true"
properties = {
"driver": "org.sqlite.JDBC"}
links_frame = spark.read.jdbc(url,
"(select * from links)a",
properties=properties)
movies_frame = spark.read.jdbc(url,
"(select * from movies)b",
properties=properties)
links_frame.registerTempTable("links")
movies_frame.registerTempTable("movies")
r = spark.sql("""
select
a.*, b.*
from
links a,
movies b
where a.movieId = b.movieId
""")
r.show()