- 可以把字符串的列按照出现频率进行排序,并生成有序索引
作者:森栏
链接:https://www.imooc.com/article/41692
来源:慕课网
def StringIndexer(): Unit ={
import org.apache.spark.ml.feature.StringIndexer
val spark: SparkSession = SparkSession.builder().appName("implicits").master("local[2]").getOrCreate()
val df = spark.createDataFrame(
Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
).toDF("id", "category")
val indexer = new StringIndexer()
.setInputCol("category")
.setOutputCol("categoryIndex")
val indexed = indexer.fit(df).transform(df)
indexed.show()
}
运行结果