from pysaprk.ml.feature import VectorAssembler()
A feature transformer that merges multiple columns into a vector column.
合并几个列到一个向量列
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler
from pyspark.sql import SparkSession
import os
os.environ['PYSPARK_PYTHON']='/usr/local/bin/python3'
if __name__ == "__main__":
spark = SparkSession\
.builder\
.appName("VectorAssemblerExample")\
.getOrCreate()
dataset = spark.createDataFrame(
[(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
["id", "hour", "mobile", "userFeatures", "clicked"])
assembler = VectorAssembler(
inputCols=["hour", "mobile", "userFeatures"],
outputCol="features")
output = assembler.transform(dataset)
print("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
output.select("features", "clicked").show(truncate=False)
spark.stop()
输出