package l847164916 import java.sql.{Connection, DriverManager, PreparedStatement} import java.util.Properties import org.apache.calcite.avatica.Meta.ConnectionProperties import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext, SaveMode} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.types._ import org.apache.spark.{SparkConf, SparkContext} /** * Created by Administrator on 2016/8/31. */ object belong_loyalty { def main(args: Array[String]): Unit = { val url = "jdbc:mysql://192.168.0.79:3306/hyn_profile" val prop = new Properties() prop.setProperty("user","root") prop.setProperty("password","hyn12345") prop.setProperty("driver","com.mysql.jdbc.Driver") val conf = new SparkConf().setAppName("hivetomysql") val sc = new SparkContext(conf) val hict = new HiveContext(sc) val sqlContext = new SQLContext(sc) hict.sql("use dwb") //该月公司发帖总数 val forum_post_cnt = hict.sql("select * from index_work_content where type=14 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count() //该月公司回帖总数 val forum_reply_cnt = hict.sql("select * from index_work_content where type=15 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count() //该月公司调查参与总数 val survey_attend_cnt = hict.sql("select * from index_work_content where type=17 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count() //该月公司调查发起总数 val survey_hold_cnt = hict.sql("select * from index_work_content where type=19 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count() //该月公司投票发起总数 val vote_hold_cnt = hict.sql("select * from index_work_content where type=20 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count() //发帖参与度:(mail_name,forum_post_rate) val forum_post= hict.sql("select mailNickname,count(*) from index_work_content where type=14 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{ case Row(mail_name:String,cnt:Long)=> //发帖参与度=员工发帖次数/公司总发帖次数 //如果发帖总数为0,则把员工的发帖参与度全置为0 if(forum_post_cnt!=0){ val forum_post_rate=cnt*1.0/forum_post_cnt (mail_name,forum_post_rate) }else{ (mail_name,0.0) } } //回帖参与度:(mail_name,forum_reply_rate) val forum_reply = hict.sql("select mailNickname,count(*) from index_work_content where type=15 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{ case Row(mail_name:String,cnt:Long)=> //回帖参与度=员工回帖次数/公司总回帖次数 //如果回帖总数为0,则把员工的回帖参与度全置为0 if(forum_reply_cnt!=0){ val forum_reply_rate = cnt*1.0/forum_reply_cnt (mail_name,forum_reply_rate) }else{ (mail_name,0.0) } } //调查参与度:(mail_name,survey_attend_rate) val survey_attend = hict.sql("select mailNickname,count(*) from index_work_content where type=17 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{ case Row(mail_name:String,cnt:Long)=> //调查参与度=员工参与调查次数/公司总调查参与次数 //如果调查参与总数为0,则把员工的调查参与度全置为0 if(survey_attend_cnt!=0){ val survey_attend_rate = cnt*1.0/survey_attend_cnt (mail_name,survey_attend_rate) }else{ (mail_name,0.0) } } //调查发起参与度:(mail_name,survey_hold_rate) val survey_hold = hict.sql("select mailNickname,count(*) from index_work_content where type=19 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{ case Row(mail_name:String,cnt:Long)=> //调查发起参与度=员工发起调查次数/公司发起调查总次数 //如果发起调查总数为0,则把员工的调查发起参与度全置为0 if(survey_hold_cnt!=0){ val survey_hold_rate = cnt*1.0/survey_hold_cnt (mail_name,survey_hold_rate) }else{ (mail_name,0.0) } } //投票发起参与度:(mail_name,vote_hold_rate) val vote_hold = hict.sql("select mailNickname,count(*) from index_work_content where type=20 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{ case Row(mail_name:String,cnt:Long)=> //投票发起参与度=员工发起投票次数/公司发起投票总次数 //如果发起投票总数为0,则把员工的投票发起参与度全置为0 if(vote_hold_cnt!=0){ val vote_hold_rate = cnt*1.0/vote_hold_cnt (mail_name,vote_hold_rate) }else{ (mail_name,0.0) } } //获取所有的员工信息:(mail_name,cn_name) val base_info = hict.sql("select mail_name from index_emp_info").map{ case Row(mail_name:String)=> (mail_name,0.0) } val belonging_loyalty_rdd1 = base_info.leftOuterJoin(forum_post) .leftOuterJoin(forum_reply) .leftOuterJoin(survey_attend) .leftOuterJoin(survey_hold) .leftOuterJoin(vote_hold) //去除元组括号 val belonging_loyalty_rdd2 = belonging_loyalty_rdd1.map{ case ((mail_name,(((((default_value,forum_post_rate),forum_reply_rate),survey_attend_rate),survey_hold_rate),vote_hold_rate))) => (mail_name,forum_post_rate.getOrElse(0.0),forum_reply_rate.getOrElse(0.0),survey_attend_rate.getOrElse(0.0),survey_attend_rate.getOrElse(0.0),vote_hold_rate.getOrElse(0.0)) } // case class belonging_loyalty(mail_name:String,forum_reply_rate:Long,forum_post_rate:Long) // val belonging_loyalty_rdd2 = belonging_loyalty_rdd1.map(r=>belonging_loyalty(r._1,r._2._1.toString.toLong,r._2._2.getOrElse(0).toString.toLong)) // val schema = StructType( // StructField("mail_name",StringType) :: // StructField("cn_name",StringType):: // StructField("forum_post_rate",LongType):: // Nil // ) import hict.implicits._ val belonging_loyalty_DF = belonging_loyalty_rdd2.toDF("mail_name","forum_post_rate","forum_reply_rate","survey_attend_rate","survey_hold_rate","vote_hold_rate") // //将DF数据存入mysql belonging_loyalty_DF.write.mode(SaveMode.Overwrite).jdbc(url,"belong_loyalty",prop) } }
多个RDD JOIN 之后 去除括号,
最新推荐文章于 2021-01-28 18:55:23 发布