【基于Spark-core模块完成项目分析】

pblh123

已于 2022-10-19 23:14:12 修改

阅读量402

点赞数

分类专栏： Hadoop Spark 文章标签： spark 数据库 mysql

于 2022-10-19 23:12:44 首次发布

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/pblh123/article/details/127417920

版权

Hadoop 同时被 2 个专栏收录

15 篇文章 4 订阅

订阅专栏

9 篇文章 0 订阅

订阅专栏

Spark-core模块完成项目分析

文章目录

需求如下

年龄对合同签订的影响
个人贷款对合同签订的影响
受教育情况对合同签订的影响

数据库中的创建表

create table `age_constrate_stat`(
`age` varchar(20) default null,
`num` int(11) default null
);

create table `loan_constrate_stat`(
`loan` varchar(20) default null,
`num` int(11) default null
);

create table `edu_constrate_stat`(
`education` varchar(20) default null,
`num` int(11) default null
);

idea开发Spark Core代码

DbUtil将处理后的数据导入MySQL

import java.sql.{Connection, DriverManager, PreparedStatement}

/**
 * @Classname DbUtil
 * @Description TODO
 * @Date 2022/10/7 19:08
 * @Created by Tiger_Li
 */
object DbUtil {
  def loadConStat2MySQL(ite: Iterator[(String, Int)]): Unit = {
    //    加载驱动
    Class.forName("com.mysql.cj.jdbc.Driver")
    //    创建连接
    var conn: Connection = null;
    //    这个对象是把数据加载到MySQL数据库中
    var ps:PreparedStatement = null;
    val sql = "insert into loan_constrate_stat(loan, num) values(?, ?);"

    try {
      conn = DriverManager.getConnection("jdbc:mysql://node1:3306/bigdata19?createDatabaseIfNotExist=true&amp;useSSL=false", "lh", "Ypassword")
      ite.foreach(t => {
        ps = conn.prepareStatement(sql)
        ps.setString(1, t._1)
        ps.setInt(2, t._2)
        ps.executeUpdate()
      })
    } catch {
      case e:Exception => println(e.getMessage)
    } finally {
      if (ps!=null){
        ps.close();
      }
      if(conn!=null){
        conn.close()
      }
    }
  }

  def ageConStat2MySQL(ite: Iterator[(Int, Int)]): Unit = {
//    加载驱动
    Class.forName("com.mysql.cj.jdbc.Driver")
//    创建连接
    var conn: Connection = null;
//    这个对象是把数据加载到MySQL数据库中
    var ps:PreparedStatement = null;
    val sql = "insert into age_constrate_stat(age, num) values(?, ?);"

    try {
      conn = DriverManager.getConnection("jdbc:mysql://node1:3306/bigdata19?createDatabaseIfNotExist=true&amp;useSSL=false", "lh", "Ypassword")
      ite.foreach(t => {
        ps = conn.prepareStatement(sql)
        ps.setInt(1, t._1)
        ps.setInt(2, t._2)
        ps.executeUpdate()
      })
    } catch {
      case e:Exception => println(e.getMessage)
    } finally {
      if (ps!=null){
        ps.close();
      }
      if(conn!=null){
        conn.close()
      }
    }

  }


}

Age2ConStat年龄对营销的影响

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

/**
 * @Classname Age2ConStat
 * @Description 年龄对营销的分析
 * @Date 2022/10/7 16:42
 * @Created by Tiger_Li
 */

/**
 * 年龄对营销的影响
 */
object Age2ConStat {

  def main(args:Array[String]):Unit = {
//    1. 创建SparkConf并配置master和appname
    val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster("spark://node1:7077")
//    2. 创建SparkSession并配置Hive支持
    val spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataSource = spark.sql("select age, poutcome from dwd.dwd_t_bank")
//    4. 转化为RDD
    import spark.implicits._
    val dataRDD = dataSource.map(row =>{
      val age = row.getAs[Int]("age")
      val poutcome = row.getAs[String]("poutcome")
      (age, poutcome)
    }).rdd
//    5. 过滤RDD
    val filterRDD = dataRDD.filter(t => t._1 > 0 && t._2 == "success")
//    6. 转化RDD
    val mapRDD = filterRDD.map(t => (t._1, 1))
//    7. 分组聚合
    val reducedRDD = mapRDD.reduceByKey(_ + _)
//    8. 写入MySQL
    reducedRDD.foreachPartition(iter => {DbUtil.ageConStat2MySQL(iter)})
//    9. 释放资源
    spark.stop()
  }

}

LoanOnConStat贷款对营销的影响

import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Classname LoanOnConStat
 * @Description TODO
// * @Date 2022/10/7 21:25
 * @Created by Tiger_Li
 */
object LoanOnConStat {

  def main(args: Array[String]): Unit = {
//    1.创建SparkConf并配置master和appname
    val conf: SparkConf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster("spark://node1:7077")
//    2. 创建SparkSession并配置hive支持
    val spark: SparkSession = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataFrame: DataFrame = spark.sql("select loan, poutcome from dwd.dwd_t_bank where loan != '' and loan is not null;")
//    4. 转化RDD
    import spark.implicits._
    val rdddata: RDD[(String, String)] = dataFrame.map(row => {
      val loan = row.getAs[String]("loan")
      val poutcome = row.getAs[String]("poutcome")
      (loan, poutcome)
    }).rdd
//    5. 过滤RDD
    val filterRDD: RDD[(String, String)] = rdddata.filter(_._2 == "success")
//    6. 转化RDD
    val mapedrdd: RDD[(String, Int)] = filterRDD.map(t => (t._1, 1))
//    7. 分组聚合
    val reduceRDD: RDD[(String, Int)] = mapedrdd.reduceByKey(_ + _)
//    8. 写入MySQL
    reduceRDD.foreachPartition(ite => {DbUtil.loadConStat2MySQL(ite)})
  }

}

EduOnConStat

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Classname EduOnConStat
 * @Description 教育对营销的分析
 * @Date 2022/10/7 16:42
 * @Created by Tiger_Li
 */

/**
 * 教育对营销的影响
 */
object EduOnConStat {

  def main(args:Array[String]):Unit = {

      if(args.length != 2){
          println("Usage: spark-submit * *.jar masterurl SaveMode[overwrite|append|ignore]")
          System.exit(1)
      }

//    1. 创建SparkConf并配置master和appname
    val MasterUrl = args(0)
    val mySaveMode = args(1)
    val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster(MasterUrl)
//    2. 创建SparkSession并配置Hive支持
    val spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataSource = spark.sql("select education, poutcome from dwd.dwd_t_bank where education !='' and education is not null")

    val filterdf = dataSource.filter("poutcome == 'success'")
    val resdf: DataFrame = filterdf.groupBy("education").agg("education"->"count").withColumnRenamed("count(education)", "num")


    val url="jdbc:mysql://node1:3306/bigdata19?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false"
    val table="edu_constrate_stat"
    val prop=new java.util.Properties()
    prop.put("driver","com.mysql.cj.jdbc.Driver")
    prop.put("user","lh")
    prop.put("password","Ypassword")
    resdf.write.mode(mySaveMode).jdbc(url,table,prop)

    spark.stop()
  }

}

部署运行

vim product_market_stat.sh

#! /bin/bash
# Age2ConStatv2
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--class Age2ConStatv2 \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite 


# EduOnConStat
opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--name eduStatSParkcore \
--class EduOnConStat \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite


# loanOnConStat
opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--name myloanOnConStat \
--class loanOnConStat \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite

案例

在这里插入图片描述

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【基于Spark-core模块完成项目分析】

Spark Core, 项目处理，Hive,MySQL,数据处理，ETL，Spark SQL
复制链接

扫一扫

专栏目录

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。