spark案例-词频统计:存储数据库

1,读数据库数据实例

  def getDataForDb() : DataFrame  ={

    //获取sparkSession
    val spark: SparkSession = SparkSession.builder().master("local").appName("getData")
      .config("spark.sql.shuffle.partitions", 1).getOrCreate()

    val properties: Properties = new Properties()
    properties.setProperty("user", "root")
    properties.setProperty("password", "123456")
    properties.setProperty("driver", "com.mysql.jdbc.Driver")

    val  winds = spark.read.jdbc("jdbc:mysql://localhost:3306/weather?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8", "(select wind_speed from weather) T", properties)

    return winds
  }

2.存入数据库

  def updateForDb(rdd : RDD[(String, Int)]  ) : Boolean ={
    //存入RDD的每一条数据
    rdd.foreachPartition(
      it => {
        var url = "jdbc:mysql://localhost:3306/weather?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8"
        val conn = DriverManager.getConnection(url, "root", "123456")
        //"insert into information(`nowtime`,`data`) values(current_time,'A');");
        val pstat = conn.prepareStatement("INSERT INTO wind(`speed`,`count`) VALUES(?,?)")
        for (obj <- it) {
          pstat.setString(1, obj._1)
          pstat.setInt(2, obj._2)
          pstat.addBatch
        }
        try {
          pstat.executeBatch
        } finally {
          pstat.close
          conn.close
        }
      }
    )
    return true
  }

3.处理

object Count {
  def main(args: Array[String]) {
    //从数据库获取数据
    val wind_dataFrame = new GetDb().getDataForDb()
    //将dataframe转为rdd结果并将row转为string
    val wind_rdd = wind_dataFrame.rdd.map(_.mkString(","))
    // 将wind_speed分组,聚合
    val result: RDD[(String, Int)] = wind_rdd.map((_, 1)).reduceByKey(_ + _)
    // 排序 降序
    val finalRes: RDD[(String, Int)] = result.sortBy(_._2, false)
    //更新到数据库
    new GetDb().updateForDb(finalRes)
  }

4.pom一览

<dependencies>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.12</artifactId>
        <version>2.4.4</version>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.12</artifactId>
        <version>2.4.4</version>
        <!--        <scope>provided</scope>-->
    </dependency>
    <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>8.0.21</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming_2.12</artifactId>
        <version>2.4.4</version>
<!--        <scope>provided</scope>-->
    </dependency>

</dependencies>
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值