保存dataframe到本地或hdfs
df.write.csv(“1.csv”)
df.write.csv(“file:/home/dir”)
df.coalesce(1).write.csv(“1.csv”)
df.coalesce(1).write.format(“com.databricks.spark.csv”).save(“/data/home/sample.csv”)
df.coalesce(1).write.format(“com.databricks.spark.csv”).option(“header”, “true”).save(“myfile.csv”)
spark dataframe pivot用法
pandas中:pivot_table(df, index=[‘A’, ‘B’], columns=[‘C’], values=’D’, aggfunc=np.sum)
spark 中:df.groupBy(“A”, “B”).pivot(“C”).sum(“D”)
df.groupBy(“company_industry”, “title”).agg(count(“title”), sum(“recruitment_number”), )