第1关 数据清洗–过滤字段长度不足的且将出生日期转换成指定格式
package com.yy
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
DataFrame, Dataset, SparkSession}
object edu{
case class Person(id:String,Name:String,CtfTp:String,CtfId:String,Gender:String,Birthday:String,Address:String,Zip:String,Duty:String,Mobile:String,Tel:String,Fax:String,EMail:String,Nation:String,Taste:String,Education:String,Company:String,Family:String,Version:String,Hotel:String,Grade:String,Duration:String,City:String)
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("Spark SQL")
.master("local")
.config("spark.some.config.option", "some-value")
.getOrCreate()
val rdd = spark.sparkContext.textFile("file:///root/files/part-00000-4ead9570-10e5-44dc-80ad-860cb072a9ff-c000.csv"