多个RDD JOIN 之后 去除括号,

package l847164916
import java.sql.{Connection, DriverManager, PreparedStatement}
import java.util.Properties

import org.apache.calcite.avatica.Meta.ConnectionProperties
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SQLContext, SaveMode}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, SparkContext}
/**
  * Created by Administrator on 2016/8/31.
  */
object belong_loyalty {
  def main(args: Array[String]): Unit = {
    val url = "jdbc:mysql://192.168.0.79:3306/hyn_profile"
    val prop = new Properties()
    prop.setProperty("user","root")
    prop.setProperty("password","hyn12345")
    prop.setProperty("driver","com.mysql.jdbc.Driver")
    val conf = new SparkConf().setAppName("hivetomysql")
    val sc = new SparkContext(conf)
    val hict = new HiveContext(sc)
    val sqlContext = new SQLContext(sc)
    hict.sql("use dwb")
    //该月公司发帖总数
    val forum_post_cnt = hict.sql("select * from index_work_content where type=14 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count()
    //该月公司回帖总数
    val forum_reply_cnt = hict.sql("select * from index_work_content where type=15 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count()
    //该月公司调查参与总数
    val survey_attend_cnt = hict.sql("select * from index_work_content where type=17 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count()
    //该月公司调查发起总数
    val survey_hold_cnt = hict.sql("select * from index_work_content where type=19 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count()
    //该月公司投票发起总数
    val vote_hold_cnt = hict.sql("select * from index_work_content where type=20 and start_time between trunc(current_date,\"MM\") and last_day(current_date)").count()
    //发帖参与度:(mail_name,forum_post_rate)
      val forum_post= hict.sql("select mailNickname,count(*) from index_work_content where type=14 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{
        case Row(mail_name:String,cnt:Long)=>
          //发帖参与度=员工发帖次数/公司总发帖次数
          //如果发帖总数为0,则把员工的发帖参与度全置为0
          if(forum_post_cnt!=0){
          val forum_post_rate=cnt*1.0/forum_post_cnt
           (mail_name,forum_post_rate)
             }else{
           (mail_name,0.0)
            }
    }

    //回帖参与度:(mail_name,forum_reply_rate)
    val forum_reply = hict.sql("select mailNickname,count(*) from index_work_content where type=15 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{
      case Row(mail_name:String,cnt:Long)=>
        //回帖参与度=员工回帖次数/公司总回帖次数
        //如果回帖总数为0,则把员工的回帖参与度全置为0
      if(forum_reply_cnt!=0){
        val forum_reply_rate = cnt*1.0/forum_reply_cnt
        (mail_name,forum_reply_rate)
      }else{
        (mail_name,0.0)
      }
    }
    //调查参与度:(mail_name,survey_attend_rate)
    val survey_attend = hict.sql("select mailNickname,count(*) from index_work_content where type=17 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{
      case Row(mail_name:String,cnt:Long)=>
        //调查参与度=员工参与调查次数/公司总调查参与次数
        //如果调查参与总数为0,则把员工的调查参与度全置为0
        if(survey_attend_cnt!=0){
          val survey_attend_rate = cnt*1.0/survey_attend_cnt
          (mail_name,survey_attend_rate)
        }else{
          (mail_name,0.0)
        }
    }
    //调查发起参与度:(mail_name,survey_hold_rate)
    val survey_hold = hict.sql("select mailNickname,count(*) from index_work_content where type=19 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{
      case Row(mail_name:String,cnt:Long)=>
        //调查发起参与度=员工发起调查次数/公司发起调查总次数
        //如果发起调查总数为0,则把员工的调查发起参与度全置为0
        if(survey_hold_cnt!=0){
          val survey_hold_rate = cnt*1.0/survey_hold_cnt
          (mail_name,survey_hold_rate)
        }else{
          (mail_name,0.0)
        }
    }
    //投票发起参与度:(mail_name,vote_hold_rate)
    val vote_hold = hict.sql("select mailNickname,count(*) from index_work_content where type=20 and start_time between trunc(current_date,\"MM\") and last_day(current_date) group by mailNickname").map{
      case Row(mail_name:String,cnt:Long)=>
        //投票发起参与度=员工发起投票次数/公司发起投票总次数
        //如果发起投票总数为0,则把员工的投票发起参与度全置为0
        if(vote_hold_cnt!=0){
          val vote_hold_rate = cnt*1.0/vote_hold_cnt
          (mail_name,vote_hold_rate)
        }else{
          (mail_name,0.0)
        }
    }
    //获取所有的员工信息:(mail_name,cn_name)
    val base_info = hict.sql("select mail_name from index_emp_info").map{
      case Row(mail_name:String)=>
        (mail_name,0.0)
    }


      val belonging_loyalty_rdd1 =
   base_info.leftOuterJoin(forum_post)
  .leftOuterJoin(forum_reply)
  .leftOuterJoin(survey_attend)
  .leftOuterJoin(survey_hold)
  .leftOuterJoin(vote_hold)
    //去除元组括号
    val belonging_loyalty_rdd2 = belonging_loyalty_rdd1.map{
      case ((mail_name,(((((default_value,forum_post_rate),forum_reply_rate),survey_attend_rate),survey_hold_rate),vote_hold_rate))) =>
        (mail_name,forum_post_rate.getOrElse(0.0),forum_reply_rate.getOrElse(0.0),survey_attend_rate.getOrElse(0.0),survey_attend_rate.getOrElse(0.0),vote_hold_rate.getOrElse(0.0))
    }
   // case class belonging_loyalty(mail_name:String,forum_reply_rate:Long,forum_post_rate:Long)
   // val belonging_loyalty_rdd2 = belonging_loyalty_rdd1.map(r=>belonging_loyalty(r._1,r._2._1.toString.toLong,r._2._2.getOrElse(0).toString.toLong))
//      val schema = StructType(
//        StructField("mail_name",StringType) ::
//          StructField("cn_name",StringType)::
//          StructField("forum_post_rate",LongType)::
//        Nil
//      )
     import hict.implicits._
     val belonging_loyalty_DF = belonging_loyalty_rdd2.toDF("mail_name","forum_post_rate","forum_reply_rate","survey_attend_rate","survey_hold_rate","vote_hold_rate")

//    //将DF数据存入mysql
     belonging_loyalty_DF.write.mode(SaveMode.Overwrite).jdbc(url,"belong_loyalty",prop)

  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值