脚本如下:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author:
@contact:
@time:
@context:按照dataframe某一列的数值排序,并增加一列索引(1)升序排
"""
from __future__ import print_function
from pyspark.sql import SparkSession
import os, time,sys
reload(sys)
sys.setdefaultencoding("utf-8")
from pyspark.sql import Row
from pyspark.sql import functions as F
from pyspark.sql.window import Window, WindowSpec
os.environ['SPARK_HOME'] = "E:/data_page/spark-2.0.2-bin-hadoop2.7"
spark = SparkSession.builder.appName("indexOrder").getOrCreate()
sc = spark.sparkContext
Df1 = sc.parallelize(range(0, 11)).map(lambda x: Row(Rank=x)).toDF()
Df1.show()
df2 = Df1.withColumn("zero", F.abs(Df1.Rank) * 0)
df2.show()
df3 = df2.select("Rank",F.row_number().over(Window.partitionBy("zero").orderBy("Rank")).alias("rownumber"))
df3.show()
time.sleep(1)
spark.stop()
结果如下:
+----+
|Rank|
+----+
| 0|
| 1|
| 2|
| 3|
| 4|
| 5|
| 6|
| 7|
| 8|
| 9|
| 10|
+----+
+----+----+
|Rank|zero|
+----+----+
| 0| 0|
| 1| 0|
| 2| 0|
| 3| 0|
| 4| 0|
| 5| 0|
| 6| 0|
| 7| 0|
| 8| 0|
| 9| 0|
| 10| 0|
+----+----+
+----+---------+
|Rank|rownumber|
+----+---------+
| 0| 1|
| 1| 2|
| 2| 3|
| 3| 4|
| 4| 5|
| 5| 6|
| 6| 7|
| 7| 8|
| 8| 9|
| 9| 10|
| 10| 11|
+----+---------+