from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
import json
def getSqlAndSpark():
"""
获取SQL和Spark的对象, SQL的没写,暂时不用
:return:
"""
spark_conf = SparkConf().setAppName('app').set('spark.ui.showConsoleProgress', 'false')
sc = SparkContext.getOrCreate(conf=spark_conf)
spark = SparkSession(sc)
return sc, spark
def read_json(sc, spark):
# data = spark.read.json('/data2/data.json')
json_data = {
'name': 'zhangsan',
'age': 14,
'sex': 'man'
}
json_info = json.loads(json.dumps(json_data))
x = sc.parallelize([json_info])
data = spark.read.json(x)
print(data.show())
def main():
sc, spark = getSqlAndSpark()
read_json(sc, spark)
if __name__ == '__main__':
main()
python使用pyspark去读json数据
最新推荐文章于 2022-11-07 09:20:04 发布