dfh1 = sqlContext.read.csv(myfeature_path + 'base_info_name.csv', header=True)
dfh1.show()
dfh1.createOrReplaceTempView('h1')
dfh2=dfh1.rdd.map(lambda _: Row(name=_['name'], province=_['province'], hydm=_['hydm'],clrq=_['clrq'],
big_hy_code=get_big_hy_code(_['hydm'])))
dfh2.repartition(1).toDF().write.csv(os.path.join(myfeature_path, "base_bighycode.csv"),mode='overwrite', header=True)
spark.stop()
`dfkk2.rdd.map(lambda _ : Row(_['_1'],_['_2'],_['_3'],_['_4'],
_['_5'],_['_6'],_['_7'],_['_8'],
_['_9'],_['_10'],the_second_hy_map((_['_11']))).repartition(1).toDF().write.csv(os.path.join(hdfsSaveStats, "the_last_second_update_bighyname.csv"),
mode='overwrite', header=True))
rdd
最新推荐文章于 2024-08-01 09:26:14 发布