疫情爬虫脚本、superset报表使用

使用 selenium 爬取 官方公布的疫情信息,mysql + superset展示

工程文件下载: 链接:https://pan.baidu.com/s/1dGx5RMVs3akD2AlpC1aXWQ?pwd=avum
提取码:avum


1.配置类
package source
import org.openqa.selenium.By
object provinceSource {
  trait Regx(var In_confirm: List[(String, Int)], var Out_confirm: List[(String, Int)],
             var In_suspectedRegxList: List[(String, Int)], var Out_suspectedRegxList: List[(String, Int)],
             var now_confirm: List[(String, Int)], var now_suspected: List[(String, Int)],
             var all_confirm: List[(String, Int)], var all_suspected: List[(String, Int)],

             var today_heal: List[(String, Int)], var today_relieve: List[(String, Int)],
             var all_heal: List[(String, Int)], var all_relieve: List[(String, Int)])
  trait Code(var area_code: String, var iso_code: String)
 
  val guoJia ="http://www.nhc.gov.cn/xcs/xxgzbd/gzbd_index.shtml"
  val guoJiaXpath =By.xpath("/html/body/div[3]/div[2]/ul")
  val guoJiaXpath2 ="/html/body/div[3]/div[2]/div[3]"
  val guoJiaCssSelecter = "div p"

  class guoJiaCode extends Code("320000", "CN-32")

  class guoJiaRegx extends Regx(List(("新增本土确诊病例(\\d+)例", 1)), List(("新增境外.*确诊病例(\\d+)例", 1)),
    List(("新增本土无症状感染者(\\d+)例",1)), List(("新增境外输入无症状感染者(\\d+)例",1)),
    List(("隔离治疗.*确诊病例(\\d+)例",1)), List(("隔离医学管理的无症状感染者(\\d+)例", 1)),
    List(), List(),
    List(("新增出院病例(\\d+)例",1)), List(("解除隔离医学管理的无症状感染者(\\d+)例",1)),
    List(), List())
}

2.主类
import org.openqa.selenium.{By, WebDriver, WebElement}
import org.openqa.selenium.chrome.ChromeDriver
import source.provinceSource.*
import java.sql.Statement
import java.text.SimpleDateFormat
import java.util
import java.util.logging.Logger
import java.util.regex.Matcher
import scala.collection.mutable
import collection.JavaConverters.*
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors

object pulgs {
  def main(args: Array[String]): Unit = {
    //设置驱动
    System.setProperty("webdriver.chrome.driver", "C:\\Users\\y\\Desktop\\superSet\\chromedriver\\chromedriver.exe")


    val fixedThreadPool = Executors.newFixedThreadPool(5)

    fixedThreadPool.execute(()=>{
      provinceCreeper("国家卫健委", guoJia, guoJiaXpath, guoJiaXpath2, guoJiaCssSelecter,guoJiaCode(), guoJiaRegx())
    })
  }

  def provinceCreeper(provinceName: String, indexUrl: String, xpath1: By, xpath2: String, cssSelecter: String,code: Code, regx: Regx) = {
    //下载地址:http://chromedriver.storage.googleapis.com/index.html
    //创建驱动
    var driver = new ChromeDriver
    //val driver = new FirefoxDriver()
    driver.get(indexUrl)

    println("\n")
    println(s"----------- ${provinceName}")
    println(s"[$provinceName] index "+indexUrl)
    val offUrl = driver.findElement(By.xpath("/html/body/div[3]/div[2]/ul"))

    val lis = offUrl.findElements(By.tagName("li")).asScala
    val li = if(provinceName.equals("安徽")) 1 else 0
    val currUrl: String = lis(li).findElement(By.tagName("a")).getAttribute("href")

    println(s"[$provinceName] second-index "+currUrl)

    driver.get(currUrl)
    val element0 = driver.findElement(By.xpath(xpath2))
    val elements = element0.findElements(By.cssSelector(cssSelecter)).asScala
    val lineArr: mutable.Buffer[String] = elements.map(p => {
      print(s"[国家卫健委] "+p.getText)
      p.getText
    })

    RegexMatches.matchLineGuojia(lineArr)
    println("-----------End\n")
  }
}

3.正则处理类
import source.provinceSource.{Code, Regx}
import java.text.SimpleDateFormat
import java.util.regex.{Matcher, Pattern}
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.{BufferedSource, Source}

object RegexMatches {

  def getMap(): mutable.HashMap[String, String] = {
    val source: BufferedSource = Source.fromFile("lib/map.txt")
    val buffer = new mutable.HashMap[String, String]()
    for (line <- source.getLines()) {
      val strings = line.split(" ")
      buffer += strings(0) -> strings(1)
    }
    buffer
  }

  def matchLineGuojia(lineArr: mutable.Buffer[String]): sqlbean = {
    val getCurDate = new SimpleDateFormat("yyyy-MM-dd").format(System.currentTimeMillis())
    val map = getMap()

    map.foreach(entry => {
      var in_confirm = 0
      var in_suspected = 0
      var out_confirm = 0
      var out_suspected = 0
      var today_infestor = 0
      var now_confirm = "未知"
      var now_suspected = "未知"
      var all_confirm = "未知"
      var all_suspected = "未知"
      var today_heal = "未知"
      var today_relieve = "未知"
      var all_heal = "未知"
      var all_relieve = "未知"
      val provinceName = entry._1
      val area_code = entry._2
      val iso_code = s"CN-${area_code}"

      lineArr.foreach(line => {
        //当日各省确诊
        if (line.matches("^\\d+月\\d+日(.*?)31个省.*")) then
          val regStr = s"${provinceName}(\\d+)例"
          val matcher = Pattern.compile(regStr).matcher(line)
          if (matcher.find()) in_confirm = matcher.group(1).toInt

        //当日各省无症状
        if (line.matches(".*新增无症状感染者.*")) then
          val regStr = s"${provinceName}(\\d+)例"
          val matcher = Pattern.compile(regStr).matcher(line)
          if (matcher.find()) in_suspected = matcher.group(1).toInt


        //当日新增治愈
        if (line.matches(".*新增治愈.*")) then
          val regStr = s"${provinceName}(\\d+)例"
          val matcher = Pattern.compile(regStr).matcher(line)
          if (matcher.find()) today_heal = matcher.group(1)

        //当日解除观察
        if (line.matches(".*解除医学观察的无症状感染者.*")) then
          val regStr = s"${provinceName}(\\d+)例"
          val matcher = Pattern.compile(regStr).matcher(line)
          if (matcher.find()) today_relieve = matcher.group(1)
      })
      
      val bean: sqlbean = sqlbean(ds = getCurDate, province_name = provinceName, area_code = area_code, iso_code = iso_code,
        in_confirm = in_confirm, in_suspected = in_suspected, out_confirm = out_confirm, out_suspected = out_suspected,
        today_infestor = today_infestor,
        now_confirm = now_confirm, now_suspected = now_suspected,
        all_confirm = all_confirm, all_suspected = all_suspected,
        today_heal = today_heal, today_relieve = today_relieve,
        all_heal = all_heal, all_relieve = all_relieve)
      println(mysqlDb.execute(bean))
      println(s"--${provinceName}--" + bean)
    })
    null
  }
}

4.bean类
case class sqlbean(var ds: String, var province_name: String, var area_code: String, var iso_code: String, var in_confirm: Int, var in_suspected: Int,var out_confirm: Int,var out_suspected: Int,
                   var today_infestor:Int,
                   var now_confirm: String ,var now_suspected:String,
                   var all_confirm: String ,var all_suspected:String,
                   var today_heal: String ,var today_relieve:String,
                   var all_heal: String ,var all_relieve:String):
  override def toString: String = s"境内确诊:$in_confirm  境内无症状:$in_suspected  境外确诊:$out_confirm  境外无症状:$out_suspected  当日感染者:$today_infestor  " +
    s"现存确诊:$now_confirm  现存无症状:$now_suspected  " +
    s"累计确诊:$all_confirm  累计无症状:$all_suspected  " +
    s"当日新增治愈:$today_heal  当日解除观察:$today_relieve  " +
    s"累计治愈出院:$all_heal  累计解除观察:$all_relieve  "

4.1.sql工具类
import java.sql.Statement

object mysqlDb {

  import java.sql.Connection
  import java.sql.DriverManager

  // 数据库连接四大属性// 数据库连接四大属性
  def execute(sqlbean: sqlbean): String = {
    val driver: String = "com.mysql.jdbc.Driver"
    val url: String = "jdbc:mysql://localhost:3306/db1?characterEncoding=UTF-8"
    val user: String = "root"
    val password: String = "xxx"
    // 加载驱动
    Class.forName(driver)
    // 获取数据库连接
    val conn: Connection = DriverManager.getConnection(url, user, password)

    // 编写SQL语句
    val sql: String = s"INSERT INTO db1.cov19Case VALUES('${sqlbean.ds}','${sqlbean.province_name}','${sqlbean.area_code}','${sqlbean.iso_code}'," +
      s"${sqlbean.in_confirm},${sqlbean.in_suspected},${sqlbean.out_confirm},${sqlbean.out_suspected},${sqlbean.today_infestor}," +
      s"'${sqlbean.now_confirm}','${sqlbean.now_suspected}','${sqlbean.all_confirm}','${sqlbean.all_suspected}'," +
      s"'${sqlbean.today_heal}','${sqlbean.today_relieve}','${sqlbean.all_heal}','${sqlbean.all_relieve}')" +
      s"ON DUPLICATE KEY UPDATE in_confirm = ${sqlbean.in_confirm} , in_suspected = ${sqlbean.in_suspected} , out_confirm = ${sqlbean.out_confirm} ,out_confirm = ${sqlbean.out_confirm} , today_infestor = ${sqlbean.today_infestor} ," +
      s"now_confirm = '${sqlbean.now_confirm}', now_suspected = '${sqlbean.now_suspected}',all_confirm = '${sqlbean.all_confirm}',all_suspected = '${sqlbean.all_suspected}'," +
      s"today_heal = '${sqlbean.today_heal}',today_relieve = '${sqlbean.today_relieve}',all_heal = '${sqlbean.all_heal}',all_relieve = '${sqlbean.all_relieve}'"
    // 创建SQL语句的执行对象
    val statement: Statement = conn.createStatement()
    // 执行SQL语句并获取结果
    val count: Int = statement.executeUpdate(sql)
    // 释放资源
    statement.close
    conn.close()
    // 判断结果
    val msg: String = if (count > 0) "(添加成功)" else "(添加失败)"
    s"[${sqlbean.province_name}-Sql] ${sql};  ${msg}!"
  }
}

在这里插入图片描述

superset报表只用了国家地图和直方图

在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值