使用python操作数据库案例
import pymysql
# 创建连接对象
db_connection = pymysql.connect(host='hadoop11',port=3306, user='root', password='123456',database='test1')
# 创建游标对象
db_cursor=db_connection.cursor()
# 增加
# sql="insert into stu(stu_id,stu_name,age,gender) values (%s, %s, %s, %s)"
# db_cursor.execute(sql,(2,'xiaoMing',34,'男'))
# db_cursor.execute('insert into stu(stu_id,stu_name,age,gender) values(%s,%s,%s,%s)',(4,'zsss',18,'男'))
# 删除
# db_cursor.execute('delete from stu where stu_id = %s',4)
# 修改
# db_cursor.execute("update stu set stu_name = %s,age=%s where stu_id = %s",('ww',22,3))
sql = 'select * from stu'
db_cursor.execute(sql)
# while True:
# result = cursor.fetchone()
# if result is None:
# break
# print(result)
# result = cursor.fetchmany(2)
# print(result)
result = db_cursor.fetchall()
print(result)
使用spark操作mysql数据库
from pyspark.sql import SparkSession
if __name__ == '__main__':
spark = SparkSession.builder \
.config('hive.metastore.uris', 'thrift://hadoop11:9083') \
.config('spark.sql.warehouse.dir', 'hdfs://hadoop11:8020/user/hive/warehouse') \
.appName('spark操作mysql数据库').enableHiveSupport().getOrCreate()
# 读取mysql上的数据
props = {'user': 'root', 'password': '123456', 'driver': 'com.mysql.jdbc.Driver'}
dataFrame=spark.read.jdbc(url="jdbc:mysql://hadoop11:3306/test1", table="funnel_name", properties=props)
#创建临时表
dataFrame.createTempView("funnel_name")
sql1 = f'''
select id from funnel_name
'''
result_df = spark.sql(sql1)
#将查询出来的id存储到一个列表中
id_list = [row['id'] for row in result_df.collect()]
# 打印列表
print(id_list)
Spark读取本地文件,并写入MySQL中的 表中,读取MySQL中的表
from pyspark.sql import SparkSession
if __name__ == '__main__':
spark = (SparkSession.builder
.master("local[*]").config("spark.driver.host","localhost")
.appName("测试")
.getOrCreate())
# 读取本地文件
df = spark.read.json(r"E:\bigdata\code\spark\code\sql\data\a.json")
props = {'user': 'root', 'password': '123456','driver': 'com.mysql.jdbc.Driver'}
# 写入MySQL中的df_user
df.write.jdbc(url='jdbc:mysql://hadoop11:3306/test1',table='df_user',properties=props)
# 读取mysql数据库上的df_user表
df1=spark.read.jdbc(url="jdbc:mysql://hadoop11:3306/test1", table="df_user", properties=props)
# 显示
df1.show()
spark.stop()