话不多说,直接上代码
#coding=utf-8
import sys
sys.path.append('/data/python_pip')
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
import pyhdb
spark = SparkSession \
.builder \
.appName("pyspark test") \
.enableHiveSupport() \
.getOrCreate()
sc = spark.sparkContext
print("SparkSession success!!!")
# schema = StructType([
# StructField("id", IntegerType(), True),
# StructField("name", StringType(), True),
# StructField("remark", StringType(), True)
# ])
# # 创建一些数据
# data = [
# (4, "Alice", "30"),
# (5, "Bob", "25"),
# (6, "Charlie", "35")
# ]
# dataDF = spark.createDataFrame(data,schema)
# 读取Hive表
datas = spark.sql("select * from table limit 1").collect()
dataRDD = sc.parallelize(datas)
dataDF = spark.createDataFrame(dataRDD,"id:int, name:string, remark:string, ct:string")
dataDF.show()
print("dataDF success!!!")
# mysql jdbc
jdbcUrl = "jdbc:mysql://host:3306/db"
jdbcProperties = {
"user":"xxx",
"password":"xxx",
"driver":"com.mysql.jdbc.Driver",
}
dataDF.write.format("jdbc") \
.option("url", jdbcUrl) \
.option("dbtable", "test_dm_unit_emp2") \
.option("user", jdbcProperties["user"]) \
.option("password", jdbcProperties["password"]) \
.option("driver", jdbcProperties["driver"]) \
.mode("append") \
.save()
print("data jdbc write success!!!")
df = spark.read.format("jdbc") \
.option("url", jdbcUrl) \
.option("dbtable", "table_name") \
.option("user", jdbcProperties["user"]) \
.option("password", jdbcProperties["password"]) \
.option("driver", jdbcProperties["driver"]) \
.load()
df.show()
print("data jdbc read success!!!")
# 停止SparkSession
sc.stop()
spark.stop()