实现
from pyspark.sql.functions import *
#df_tmp为DF对象,包含列“captureTime”
df_tmp.agg(min(col("captureTime")),max(col("captureTime"))).show()
结果
读取忽略空格
#ignoreLeadingWhiteSpace=True ->忽略字段内前面空格
#ignoreTrailingWhiteSpace=True ->忽略字段内后面空格
df=sqlContext.read.csv(path_yitiji,header=True,ignoreLeadingWhiteSpace=True,ignoreTrailingWhiteSpace=True,encoding="utf-8")