spark业务开发-列选择
输入数据
"id","name","description","weight"
"102","car battery","12V car battery","8.1"
"103","12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3","0.8"
"104","hammer","12oz carpenter's hammer","0.75"
"105","hammer","14oz carpenter's hammer","0.875"
"106","hammer","16oz carpenter's hammer","1"
"107","rocks","box of assorted rocks","5.3"
"108","jacket","water resistent black wind breaker","0.1"
"109","spare tire","24 inch spare tire","22.2"
"101","scooter","Small 2-wheel scooter","3.14"
"102","scooter1","Small 2-wheel scooter1","3.14"
输出数据
+---+------+
| id|weight|
+---+------+
|102| 8.1|
|103| 0.8|
|104| 0.75|
|105| 0.875|
|106| 1.0|
|107| 5.3|
|108| 0.1|
|109| 22.2|
|101| 3.14|
|102| 3.14|
+---+------+
程序代码
package com. cch. bigdata. spark. process. cols
import com. cch. bigdata. spark. process. AbstractTransform
import org. apache. spark. sql. DataFrame
import org. apache. spark. sql. functions. col
class ColumnChooser extends AbstractTransform{
private val columns = Array[ String ] ( "id" , "weight" )
override def process( ) : Unit = {
if ( columns. isEmpty) {
throw new RuntimeException( "列未选择!" )
}
val df: DataFrame = loadCsv( "src/main/resources/csv/products.csv" , spark)
df. select( columns. map( name => col( name) ) : _* ) . show( )
}
override def getAppName( ) : String = "列选择"
}
object ColumnChooser{
def main( args: Array[ String ] ) : Unit = {
new ColumnChooser( ) . process( )
}
}
参数解释