from pyspark.ml.regression import AFTSurvivalRegression
from pyspark.ml.linalg import Vectors
from pyspark.sql import SparkSession
spark= SparkSession\
.builder \
.appName("dataFrame") \
.getOrCreate()
training = spark.createDataFrame([
(1.218, 1.0, Vectors.dense(1.560, -0.605)),
(2.949, 0.0, Vectors.dense(0.346, 2.158)),
(3.627, 0.0, Vectors.dense(1.380, 0.231)),
(0.273, 1.0, Vectors.dense(0.520, 1.151)),
(4.199, 0.0, Vectors.dense(0.795, -0.226))], ["label", "censor", "features"])
quantileProbabilities = [0.3, 0.6]
aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
quantilesCol="quantiles")
model = aft.fit(training)
# Print the coefficients, intercept and scale parameter for AFT survival regression
print("Coefficients: " + str(model.coefficients))
print("Intercept: " + str(model.intercept))
print("Scale: " + str(model.scale))
model.transform(training).show(truncate=False)
Coefficients: [-0.4963044110531165,0.19845217252922842]
Intercept: 2.638089896305634
|1.218|1.0 |[1.56,-0.605] |5.718985621018952|[1.1603229908059516,4.995460583406753] |
|2.949|0.0 |[0.346,2.158] |18.07678210850554|[3.6675919944963185,15.789837303662035]|
|3.627|0.0 |[1.38,0.231] |7.381908879359964|[1.4977129086101577,6.448002719505493] |
|0.273|1.0 |[0.52,1.151] |13.57771781488451|[2.754778414791513,11.859962351993202] |
|4.199|0.0 |[0.795,-0.226]|9.013087597344812|[1.828662187733188,7.8728164067854856] |
+-----+------+--------------+-----------------+---------------------------------------+