方式一(只有一列值的转换)
# python3
# -*- coding:utf-8 -*-
# @Time: 3/26/20 7:14 PM
# @Author: Damon
# @Software: PyCharm
import logging
from pyspark.sql import *
from pyspark.sql import functions as F
def score_df_processing(rdd,spark):
"""
:param data:
:return:
"""
print(type(rdd))
if rdd.isEmpty():
logging.info(' error: input score data empty...')
return None
else:
row=Row("score") #列名
df=rdd.map(row).toDF() #RDD转为DF
print(df.show()) #第一次输出
#按照“row_number”升序排序
df=df.withColumn('row_number',F.row_number().over(Window.orderBy("score")))
print(df.show()) #第二次输出
row_nums = df.count()
df=df.withColumn('slots',df.row_number*100/row_nums)
print(df.show(10)) #第三次输出
return df.rdd
spark = SparkSession.builder.appName('rdd_df').config(&#