Basic Statistics Correlation from pyspark.ml.linalg import Vectors from pyspark.ml.stat import Correlation data = [(Vectors.sparse(4, [(0, 1.0), (3, -2.0)]),), (Vectors.dense([4.0, 5.0, 0.0, 3.0]),), (Vectors.dense([6.0, 7.0, 0.0, 8.0]),), (Vectors.sparse(4, [(0, 9.0), (3, 1.0)]),)] df