打印df结构信息
df.printSchema()
更改df表结构:更改列类型和列名称
raw_sample_df = df.
withColumn(“user”, df.user.cast(IntegerType())).withColumnRenamed(“user”, “userId”).
withColumn(“time_stamp”, df.time_stamp.cast(LongType())).withColumnRenamed(“time_stamp”, “timestamp”).
withColumn(“adgroup_id”, df.adgroup_id.cast(IntegerType())).withColumnRenamed(“adgroup_id”, “adgroupId”).
withColumn(“pid”, df.pid.cast(StringType())).
withColumn(“nonclk”, df.nonclk.cast(IntegerType())).
withColumn(“clk”, df.clk.cast(IntegerType()))
raw_sample_df.printSchema()
raw_sample_df.show()