闲来无事,在stackoverflow上看代码玩,偶尔发现一个之前没有用过的函数--foldLeft函数。现做记录如下:
val sourceDF = Seq(
(" p a b l o", "Paraguay"),
("Neymar", "B r asil")
).toDF("name", "country")
val actualDF = Seq(
"name",
"country"
).foldLeft(sourceDF) { (memoDF, colName) =>
memoDF.withColumn(
colName,
regexp_replace(col(colName), "\\s+", "")
)
}
actualDF.show()
+------+--------+
| name| country|
+------+--------+
| pablo|Paraguay|
|Neymar| Brasil|
+------+--------+
val sourceDF = Seq(
("funny", "joke")
).toDF("A b C", "de F")
sourceDF.show()
+-----+----+
|A b C|de F|
+-----+----+
|funny|joke|
+-----+----+
val actualDF = sourceDF
.columns
.foldLeft(sourceDF) { (memoDF, colName) =>
memoDF
.withColumnRenamed(
colName,
colName.toLowerCase().replace(" ", "_")
)
}
actualDF.show()
+-----+----+
|a_b_c|de_f|
+-----+----+
|funny|joke|
+-----+----+
import org.apache.spark.sql.DataFrame
def snakeCaseColumns(df: DataFrame): DataFrame = {
df.columns.foldLeft(df) { (memoDF, colName) =>
memoDF.withColumnRenamed(colName, toSnakeCase(colName))
}
}
def toSnakeCase(str: String): String = {
str.toLowerCase().replace(" ", "_")
}
val sourceDF = Seq(
("funny", "joke")
).toDF("A b C", "de F")
val actualDF = sourceDF.transform(snakeCaseColumns)
actualDF.show()
+-----+----+
|a_b_c|de_f|
+-----+----+
|funny|joke|
+-----+----+
参考博客为
1、How can I concat several float columns into one ArrayType(FloatType()) in spark DataFrame?
2、Performing operations on multiple columns in a Spark DataFrame with foldLeft