from pyspark.sql.functions import udf, col, explode, from_json
from pyspark.sql.types import LongType, StructType, StructField, FloatType, IntegerType, StringType, DoubleType, BooleanType, ArrayType, DecimalType
from pyspark.sql import functions as F, window as W, Row
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
wind = Window.partitionBy('name')
med = F.expr('percentile_approx(len, array(0.25, 0.5, 0.75, 0.95))')
#df.withColumn('med_val', med.over(wind)).show()
seq_df = spark.read.text("/user/data/my_name/rec/seq_outputs/{}".format(cur_date))\
.withColumn('name',lit(1))\
.withColumn('len',F.split(col('value'),';')[4])\
.withColumn('len',col('len').cast(IntegerType()))\
.withColumn('med_val', med.over(wind))
seq_df.show()