【基于Spark-SQL模块完成项目分析】

10 篇文章 0 订阅

Spark-SQL模块

需求

基于Spark SQL完成需求开发

  • 婚姻状况对合同签订的影响
  • 联系方式对合同签订的影响
  • 季节变换合同签订的影响

返回目录

创建数据库

create table `mari_con_stat`(
`marital` varchar(20) default null,
`num` int(11) default null
);

create table `contact_con_stat`(
`contact` varchar(20) default null,
`num` int(11) default null
);

create table `month_con_stat`(
`month` varchar(20) default null,
`num` int(11) default null
);

返回目录

IDEA中编码开发

sparksql.MaritalContractStatSql

package sparksql

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Classname MaritalContractStatSql
 * @Description 年龄对营销的分析
 * @Date 2022/10/7 16:42
 * @Created by Tiger_Li
 */

/**
 * 年龄对营销的影响
 */
object MaritalContractStatSql {

  def main(args:Array[String]):Unit = {

      if(args.length != 2){
          println("Usage: spark-submit * *.jar masterurl SaveMode[overwrite|append|ignore]")
          System.exit(1)
      }

//    1. 创建SparkConf并配置master和appname
    val MasterUrl = args(0)
    val mySaveMode = args(1)
    val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster(MasterUrl)
//    2. 创建SparkSession并配置Hive支持
    val spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataSource = spark.sql("select marital, poutcome from dwd.dwd_t_bank where marital !='' and marital is not null")

//    4. 注册为临时表
    dataSource.createOrReplaceTempView("t_bank_temp")
//      5. SQL处理
    val resdf: DataFrame = spark.sql("select marital,count(*) as num from t_bank_temp where poutcome = 'success' group by marital")

    val url="jdbc:mysql://node1:3306/bigdata19?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false"
    val table="mari_con_stat"
    val prop=new java.util.Properties()
    prop.put("driver","com.mysql.cj.jdbc.Driver")
    prop.put("user","lh")
    prop.put("password","ypassword")
    resdf.write.mode(mySaveMode).jdbc(url,table,prop)

    spark.stop()
  }

}


返回目录

sparksql.ContactContractStatSql

package sparksql

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Classname ContactContractStatSql
 * @Description 年龄对营销的分析
 * @Date 2022/10/7 16:42
 * @Created by Tiger_Li
 */

/**
 * 年龄对营销的影响
 */
object ContactContractStatSql {

  def main(args:Array[String]):Unit = {

      if(args.length != 2){
          println("Usage: spark-submit * *.jar masterurl SaveMode[overwrite|append|ignore]")
          System.exit(1)
      }

//    1. 创建SparkConf并配置master和appname
    val MasterUrl = args(0)
    val mySaveMode = args(1)
    val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster(MasterUrl)
//    2. 创建SparkSession并配置Hive支持
    val spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataSource = spark.sql("select contact, poutcome from dwd.dwd_t_bank where contact !='' and contact is not null")
//    4. 注册为临时表
    dataSource.createOrReplaceTempView("t_bank_temp")
//      5. SQL处理
    val resdf: DataFrame = spark.sql("select contact,count(*) as num from t_bank_temp where poutcome = 'success' group by contact")

    val url="jdbc:mysql://node1:3306/bigdata19?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false"
    val table="contact_con_stat"
    val prop=new java.util.Properties()
    prop.put("driver","com.mysql.cj.jdbc.Driver")
    prop.put("user","lh")
    prop.put("password","ypassword")
    resdf.write.mode(mySaveMode).jdbc(url,table,prop)

    spark.stop()
  }

}

返回目录

sparksql.monthContractStatSql

package sparksql

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Classname monthContractStatSql
 * @Description 年龄对营销的分析
 * @Date 2022/10/7 16:42
 * @Created by Tiger_Li
 */

/**
 * 年龄对营销的影响
 */
object monthContractStatSql {

  def main(args:Array[String]):Unit = {

      if(args.length != 2){
          println("Usage: spark-submit * *.jar masterurl SaveMode[overwrite|append|ignore]")
          System.exit(1)
      }

//    1. 创建SparkConf并配置master和appname
    val MasterUrl = args(0)
    val mySaveMode = args(1)
    val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster(MasterUrl)
//    2. 创建SparkSession并配置Hive支持
    val spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
//    3. 读取hive数据源
    val dataSource = spark.sql("select month_of_year as month, poutcome from dwd.dwd_t_bank where month_of_year !='' and month_of_year is not null")
    dataSource.show(5)
    dataSource.printSchema()
//    4. 注册为临时表
    dataSource.createOrReplaceTempView("t_bank_temp")
//      5. SQL处理
    val resdf: DataFrame = spark.sql("select month,count(*) as num from t_bank_temp where poutcome = 'success' group by month")
    resdf.show(5,false)
    resdf.printSchema()

    val url="jdbc:mysql://node1:3306/bigdata19?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false"
    val table="month_con_stat"
    val prop=new java.util.Properties()
    prop.put("driver","com.mysql.cj.jdbc.Driver")
    prop.put("user","lh")
    prop.put("password","ypassword")
    resdf.write.mode(mySaveMode).jdbc(url,table,prop)

    spark.stop()
  }

}

完成代码编写后通过maven打包架包
在这里插入图片描述

返回目录

部署运行

启动hadoop,启动hive,启动spark

# 启动hadoop
/opt/soft_installed/hadoop-2.7.3/sbin/start-dfs.sh
/opt/soft_installed/hadoop-2.7.3/sbin/start-yarn.sh
/opt/soft_installed/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh start historyserver

# 启动hive
nohup hive --service metastore >$HIVE_LOG_DIR/metastore.log 2>&1 &

# 启动spark
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/sbin/start-all.sh
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/sbin/start-history-server.sh

创建shell脚本运行spark SQL版本的营销分析

# 婚姻状况对营销的影响
# sparksql.MaritalContractStatSql
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--name MCSSQL \
--class sparksql.MaritalContractStatSql \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite 

# 练习方式对营销的影响
# sparksql.ContactContractStatSql
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--name CCSSQL \
--class sparksql.ContactContractStatSql \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite 

# 月份对营销的影响
# sparksql.monthContractStatSql
/opt/soft_installed/spark-2.4.8-bin-hadoop2.7/bin/spark-submit \
--name MonthCSSQL \
--class sparksql.monthContractStatSql \
--master yarn \
--deploy-mode cluster \
--driver-memory 512m \
--driver-cores 1 \
--executor-memory 512m \
--num-executors 1 \
--executor-cores 1 \
--driver-class-path /home/lh/softs/mysql-connector-java-8.0.30.jar \
/home/lh/hadooptest/jars/GUNbank-V2-jar-with-dependencies.jar \
spark://node1:7077 overwrite 

返回目录

结果检查

Spark history

Job
在这里插入图片描述

stage
在这里插入图片描述

executors
在这里插入图片描述

返回目录

数据库

连接数据库检查
在这里插入图片描述

返回目录

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值