1. 统计表中有多少数据
- 导入数据
val dfraw= spark.read.format("csv").option("header",value = true).option("encoding","utf-8").load("file:///home/hadoop/smartdata*")
- 修改全部列名(改为英文)
val dfraw01 = Seq("cardid","time","station","type")
val df = dfraw.toDF(dfraw01:_*)
- 统一站点名称
def replaceStationName(station:String):String ={
var dststation=station
if(!station.endsWith("站"))
dststation = station + "站"
else
dststation = dststation
if(dststation.equals("马鞍山站"))