import pyspark.sql.functions as fn
ff = lambda cond: fn.countDistinct(fn.when(cond,df['s_id']).otherwise(None))
cond = (df['class_status']=='FINISHED') & (df['finish_type']=='AS_SCHEDULED')
df.groupby('req_date').agg(ff(cond)).orderBy('req_date').show()
#output:
+----------+--------------------------------------------------------------------------------------------------------------+
| req_date|count(DISTINCT CASE WHEN ((class_status = FINISHED) AND (finish_type = AS_SCHEDULED)) THEN s_id ELSE NULL END)|
+----------+--------------------------------------------------------------------------------------------------------------+
|2019-08-02| 5489|
|2019-08-03| 5545|
|2019-08-04| 4822|
|2019-08-05| 3774|
|2019-08-06| 4798|
|2019-08-07| 4562|
|2019-08-08| 4609|
|2019-08-09| 4646|
|2019-08-10| 4290|
|2019-08-11| 3554|
|2019-08-12| 649|
|2019-08-13| 4142|
|2019-08-14| 4138|
|2019-08-15| 1930|
+----------+--------------------------------------------------------------------------------------------------------------+