test_df.printSchema()
root
|-- features: vector (nullable = true)
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
vector_udf = F.udf(lambda vector: vector.toArray().tolist(), ArrayType(FloatType()))
flattened_df = test_df.withColumn('col1', vector_udf('features'))
flattened_df.printSchema()
root
|-- features: vector (nullable = true)
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
|-- col1: array (nullable = true)
| |-- element: float (containsNull = true)
list4tojson = ['col1']
#保存文件
flattened_df.select('output', *[F.to_json(x) for x in list4tojson]).coalesce(1).write.csv('test_001.csv')
save_df=flattened_df.select('output',"prediction", *[F.to_json(x) for x in list4tojson])
save_df.printSchema()
root
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
|-- to_json(col1): string (nullable = true)