2021-06-25

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.{Callable, Executors, FutureTask}

import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import com.fengtu.sparktest.aoi.GetGjAoiInterface2
import org.apache.commons.lang.StringUtils
import org.apache.log4j.Logger
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.ArrayBuffer

object AoiAccuracyInterfaceTest7 {
  @transient lazy val logger: Logger = Logger.getLogger(AoiAccuracyInterfaceTest7.getClass)
  val appName: String = this.getClass.getSimpleName.replace("$", "")
  def main(args: Array[String]): Unit = {
    //val inc_day ="20201107"
    //val str = GetGjAoiInterface.getContent(","{\"inc_day\":\"20201107\",\"un\":\"40398702\",\"bn\":\"571FE\",\"tp\":\"20201107\",\"tracks\":[{\"ac\":\"12\",\"x\":\"120.488271\",\"y\":\"30.256971\",\"tm\":\"1604716191\",\"t_time\":\"2020/11/7 10:29\"},{\"ac\":\"13\",\"x\":\"120.488068\",\"y\":\"30.256066\",\"tm\":\"1604716311\",\"t_time\":\"2020/11/7 10:31\"},{\"ac\":\"30\",\"x\":\"120.493522\",\"y\":\"30.260036\",\"tm\":\"1604737005\",\"t_time\":\"2020/11/7 16:16\"},{\"ac\":\"7\",\"x\":\"120.493521\",\"y\":\"30.259342\",\"tm\":\"1604737041\",\"t_time\":\"2020/11/7 16:17\"},{\"ac\":\"12\",\"x\":\"120.492902\",\"y\":\"30.2581\",\"tm\":\"1604737084\",\"t_time\":\"2020/11/7 16:18\"}]}")
    val inc_day = args(0)
    //val inc_day = "20201129"
    start(inc_day)
  }

  def start(inc_day: String): Unit = {
    val spark = SparkSession
      .builder()
      .appName("SparkDecode")
      .master("yarn")
      .enableHiveSupport()
      .config("hive.exec.dynamic.partition",true)
      .config("hive.exec.dynamic.partition.mode","nonstrict")
      .getOrCreate()
    spark.sparkContext.setLogLevel("ERROR")
  //  val array = Array("20201213","20201214")


    val service = Executors.newFixedThreadPool(6)
    val list = new java.util.ArrayList[Callable[Void]]()

    spark.read.format("csv").option("header", "true").load("/user/01401062/upload/gis/project/aoi/aoiAccuracy/areaAddrTest.csv").rdd.map(
      x => {
        x.getString(0)
      }).collect().foreach(x => {
      println(x)
      list.add(new Callable[Void] {
        override def call(): Void = {
              startSta(spark,inc_day,x)
          null
        }})
    })


    service.invokeAll(list)
    service.shutdown()


    logger.error("统计完毕")
    spark.stop()

  }

  //private val gjUrl ="http://"
  private val gjUrl ="http://
  case class res(
                  un:String,
                  bn:String,
                  inc_day_gj:String,
                  fix:String,
                  fixX:String,
                  fixY:String,
                  aoiid:String,
                  aoiname:String,
                  fatype:String,
                  faname:String,
                  interval:String,
                  dis:String,
                  speed:String,
                  tm:String,
                  t_time:String,
                  tp:String,
                  aoicode:String,
                  response:String,
                  input:String
                )

  case class re2(
                  un:String,
                  tp:String,
                  zx:String,
                  zy:String,
                  ac:String,
                  tm:String,
                  t_time:String,
                  inc_day:String,
                  id:String,
                  bt:String,
                  sc:String
                )

  case class resAll(
                     un:String,
                     tp:String,
                     zx:String,
                     zy:String,
                     ac:String,
                     tm:String,
                     bn:String,
                     t_time:String,
                     inc_day_gj:String,
                     fix:String,
                     fixX:String,
                     fixY:String,
                     aoiid:String,
                     aoiname:String,
                     fatype:String,
                     faname:String,
                     interval:String,
                     dis:String,
                     speed:String,
                     aoicode:String,
                     response:String,
                     input:String
                   )


  def startSta(spark:SparkSession,inc_day:String,citycode:String): Unit ={

    val dataStartTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val delSql =
      s"""
         |alter table dm_gis.aoi_accuracy_gj_detail_new drop if exists partition (inc_day='${inc_day}',citycode='${citycode}')
           """.stripMargin
    spark.sql(delSql)
    logger.error("删除分区:" + delSql)

    val trajSql =
      s"""
         |select  un,bn,tp,zx,zy,ac,tm,from_unixtime(cast(tm as int), 'yyyy/MM/dd HH:mm:ss') as t_time,inc_day,id,bt,sc,regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') citycode
         |  from
         |dm_gis.esg_gis_loc_trajectory
         |where
         | inc_day ='${inc_day}'
         |and
         |  regexp_replace(substring(bn,0,4),'[a-zA-Z]*','') = '${citycode}'
         |and
         |  ak =1
             """.stripMargin
    //logger.error("轨迹sql为:" + trajSql)
    logger.error("轨迹sql为" + trajSql)
    val trajCalPre = spark.sql(trajSql).persist(StorageLevel.MEMORY_AND_DISK_SER)
    logger.error("输入数据量为" + trajCalPre.count())
    trajCalPre.take(2).foreach(println(_))

    val trajCalPre2 = trajCalPre.rdd.map(x => {
      val jo = new JSONObject()
      val names =  Array("un","bn","tp","zx","zy","ac","tm","t_time","inc_day","id","bt","sc","citycode")
      for(i <- names.indices) jo.put(names(i),x.getString(i))
      jo
    }).persist(StorageLevel.MEMORY_AND_DISK_SER)
    trajCalPre2.take(4).foreach(println(_))


    logger.error("输入数据量为" + trajCalPre2.count())
    trajCalPre2.take(2).foreach(println(_))

    val trajCal =trajCalPre2.map(x =>{
      val un = x.getString("un")
      val bn = x.getString("bn")
      val inc_day = x.getString("inc_day")
      val citycode = x.getString("citycode")

      ((un,bn,inc_day,citycode),x)
    }).groupByKey().map( x=> {
      val list = x._2.toList.sortBy(_.getString("tm"))
      val joArray = new JSONArray
      val un = x._1._1
      val bn = x._1._2
      val inc_day =x._1._3
      val citycode = x._1._4

      for (i <- 0 until(list.length)) {
        val jo = new JSONObject()
        val zx = list(i).getDouble("zx")
        val zy = list(i).getDouble("zy")
        val ac=  list(i).getString("ac")
        val tm = list(i).getBigInteger("tm")
        val tp = list(i).getBigInteger("tp")
        val t_time = list(i).getString("t_time")
        jo.put("x",zx)
        jo.put("y",zy)
        jo.put("ac",ac)
        jo.put("tm",tm)
        jo.put("t_time",t_time)
        jo.put("tp",tp)
        joArray.add(jo)
      }

      val json = new JSONObject()

      json.put("un",un)
      json.put("bn",bn)
      json.put("inc_day",inc_day)
      json.put("citycode",citycode)
      json.put("tracks",joArray)

      json
    }).persist(StorageLevel.MEMORY_AND_DISK_SER)
    logger.error("处理后的轨迹的输入量为" + trajCal.count())
    val gjUrlBroadcast= spark.sparkContext.broadcast(gjUrl)
    import spark.implicits._

    val callStartTime =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val dataReadyTime =  ((callStartTime - dataStartTime) /1000) / 60.0

    logger.error(citycode + "城市数据准备耗时为:" + dataReadyTime.formatted("%.2f"))
    logger.error(citycode + "开始调用轨迹端口时间为:" +  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date))


    val traj_res = trajCal.repartition(20).flatMap( x => {
      val url = gjUrlBroadcast.value
      val json = GetGjAoiInterface2.getContent(url,x)
      val buffer = new ArrayBuffer[res]
      if(StringUtils.isNotEmpty(json) && json.contains("tracks") && json.contains("inc_day")){
          val jo = JSON.parseObject(json)
          val un = jo.getString("un")
          val bn = jo.getString("bn")
          val inc_day_gj = jo.getString("inc_day")
          val array = jo.getJSONArray("tracks")
          for(i <- 0 until(array.size())){
            val jo1 = array.getJSONObject(i)
            val fix = jo1.getString("fix")
            val fixX = jo1.getString("fixX")
            val fixY = jo1.getString("fixY")
            val aoiid = jo1.getString("aoiid")
            val aoiname = jo1.getString("aoiname")
            val fatype = jo1.getString("fatype")
            val faname = jo1.getString("faname")
            val interval = jo1.getString("interval")
            val dis = jo1.getString("dis")
            val speed = jo1.getString("speed")
            val aoicode = jo1.getString("aoicode")

            val tm = jo1.getString("tm")
            val t_time = jo1.getString("t_time")
            val tp = jo1.getString("tp")
            buffer.append(res(un,bn,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,tm,t_time,tp,aoicode,"Correct",""))
        }
      }else{
        val un = x.getString("un")
        val bn = x.getString("bn")
        val inc_day_gj = x.getString("inc_day")
        if(StringUtils.isEmpty(json)){
          buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","","Empty",x.toJSONString))
        }else{
          buffer.append(res(un,bn,inc_day_gj,"","","","","","","","","","","","","","",json,x.toJSONString))
        }
      }
      buffer
    }).distinct().toDF()
    logger.error("traj_res数量为:"+ traj_res.count())



    val trajCalPre3 = trajCalPre2.map(
      x =>{
        val un = x.getString("un")
        val bn = x.getString("bn")
        val inc_day = x.getString("inc_day")
        val t_time = x.getString("t_time")
        val tp = x.getString("tp")
        ((un,bn,inc_day,t_time,tp),x)
      }).distinct()


    val resAllDf = traj_res.rdd.map(x => {
      val jo = new JSONObject()
      val names =  Array("un","bn","inc_day_gj","fix","fixX","fixY","aoiid","aoiname","fatype","faname","interval","dis","speed","tm","t_time","tp","aoicode","response","input")
      for(i <- names.indices) jo.put(names(i),x.getString(i))
      val un = x.getString(0)
      val bn = x.getString(1)
      val inc_day_gj = x.getString(2)
      val tm = x.getString(13)
      val t_time =x.getString(14)
      val tp = x.getString(15)
      ((un,bn,inc_day_gj,t_time,tp),jo)
    }).leftOuterJoin(trajCalPre3).map(x => {
      val leftBody = x._2._1
      val rightBody = x._2._2
      val un =leftBody.getString("un")
      val bn =leftBody.getString("bn")
      val inc_day_gj =leftBody.getString("inc_day_gj")
      val fix =leftBody.getString("fix")
      val fixX =leftBody.getString("fixX")
      val fixY =leftBody.getString("fixY")
      val aoiid =leftBody.getString("aoiid")
      val aoiname =leftBody.getString("aoiname")
      val fatype =leftBody.getString("fatype")
      val faname =leftBody.getString("faname")
      val interval =leftBody.getString("interval")
      val dis =leftBody.getString("dis")
      val speed =leftBody.getString("speed")
      val tm =leftBody.getString("tm")
      val t_time =leftBody.getString("t_time")
      val tp =leftBody.getString("tp")
      val aoicode =leftBody.getString("aoicode")
      val response =leftBody.getString("response")
      val input =leftBody.getString("input")

      val rightBodyNew = rightBody.getOrElse(new JSONObject())
      val zx = rightBodyNew.getString("zx")
      val zy = rightBodyNew.getString("zy")
      val ac = rightBodyNew.getString("ac")

      resAll(un,tp,zx,zy,ac,tm,bn,t_time,inc_day_gj,fix,fixX,fixY,aoiid,aoiname,fatype,faname,interval,dis,speed,aoicode,response,input)

    }).distinct().toDF().persist(StorageLevel.MEMORY_AND_DISK_SER)

    logger.error(citycode +"调用接口返回的数据量为:" + resAllDf.count())

    val callEndTime =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").getCalendar.getTimeInMillis
    val timeDiff = ((callEndTime - callStartTime) / 1000 ) / 60.0
    logger.error(citycode + "调用轨迹端口结束时间为:" + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date))
    logger.error(citycode + "城市端口调用时间为:" + timeDiff.formatted("%.2f") )
    trajCalPre.unpersist()
    resAllDf.take(2).foreach(println(_))

    var numPartiton = scala.math.ceil((resAllDf.count() / 500000)).toInt
    if(numPartiton <= 1)
      numPartiton = 5
    logger.error(citycode+ "城市分区数为:" + numPartiton)

    resAllDf.withColumn("inc_day",lit(inc_day)).withColumn("citycode",lit(citycode)).repartition(numPartiton).write.mode(SaveMode.Append).insertInto("dm_gis.aoi_accuracy_gj_detail_new")
    resAllDf.unpersist()


  }



}
使用python中的pymsql完成如下:表结构与数据创建 1. 建立 `users` 表和 `orders` 表。 `users` 表有用户ID、用户名、年龄字段,(id,name,age) `orders` 表有订单ID、订单日期、订单金额,用户id字段。(id,order_date,amount,user_id) 2 两表的id作为主键,`orders` 表用户id为users的外键 3 插入数据 `users` (1, '张三', 18), (2, '李四', 20), (3, '王五', 22), (4, '赵六', 25), (5, '钱七', 28); `orders` (1, '2021-09-01', 500, 1), (2, '2021-09-02', 1000, 2), (3, '2021-09-03', 600, 3), (4, '2021-09-04', 800, 4), (5, '2021-09-05', 1500, 5), (6, '2021-09-06', 1200, 3), (7, '2021-09-07', 2000, 1), (8, '2021-09-08', 300, 2), (9, '2021-09-09', 700, 5), (10, '2021-09-10', 900, 4); 查询语句 1. 查询订单总金额 2. 查询所有用户的平均年龄,并将结果四舍五入保留两位小数。 3. 查询订单总数最多的用户的姓名和订单总数。 4. 查询所有不重复的年龄。 5. 查询订单日期在2021年9月1日至9月4日之间的订单总金额。 6. 查询年龄不大于25岁的用户的订单数量,并按照降序排序。 7. 查询订单总金额排名前3的用户的姓名和订单总金额。 8. 查询订单总金额最大的用户的姓名和订单总金额。 9. 查询订单总金额最小的用户的姓名和订单总金额。 10. 查询所有名字中含有“李”的用户,按照名字升序排序。 11. 查询所有年龄大于20岁的用户,按照年龄降序排序,并只显示前5条记录。 12. 查询每个用户的订单数量和订单总金额,并按照总金额降序排序。
06-03
这里是使用 Python 的 PyMySQL 完成上述任务的代码: ```python import pymysql # 连接数据库 conn = pymysql.connect(host='localhost', port=3306, user='root', password='123456', database='test') # 创建游标 cur = conn.cursor() # 创建 users 表 cur.execute(''' CREATE TABLE users ( id INT PRIMARY KEY, name VARCHAR(20), age INT ) ''') # 创建 orders 表 cur.execute(''' CREATE TABLE orders ( id INT PRIMARY KEY, order_date DATE, amount INT, user_id INT, FOREIGN KEY (user_id) REFERENCES users(id) ) ''') # 插入数据 users_data = [ (1, '张三', 18), (2, '李四', 20), (3, '王五', 22), (4, '赵六', 25), (5, '钱七', 28) ] orders_data = [ (1, '2021-09-01', 500, 1), (2, '2021-09-02', 1000, 2), (3, '2021-09-03', 600, 3), (4, '2021-09-04', 800, 4), (5, '2021-09-05', 1500, 5), (6, '2021-09-06', 1200, 3), (7, '2021-09-07', 2000, 1), (8, '2021-09-08', 300, 2), (9, '2021-09-09', 700, 5), (10, '2021-09-10', 900, 4) ] for user in users_data: cur.execute('INSERT INTO users VALUES (%s, %s, %s)', user) for order in orders_data: cur.execute('INSERT INTO orders VALUES (%s, %s, %s, %s)', order) # 提交事务 conn.commit() # 查询语句 # 1. 查询订单总金额 cur.execute('SELECT SUM(amount) FROM orders') print(cur.fetchone()[0]) # 2. 查询所有用户的平均年龄,并将结果四舍五入保留两位小数。 cur.execute('SELECT ROUND(AVG(age), 2) FROM users') print(cur.fetchone()[0]) # 3. 查询订单总数最多的用户的姓名和订单总数。 cur.execute(''' SELECT users.name, COUNT(*) AS total_orders FROM users JOIN orders ON users.id = orders.user_id GROUP BY users.id ORDER BY total_orders DESC LIMIT 1 ''') print(cur.fetchone()) # 4. 查询所有不重复的年龄。 cur.execute('SELECT DISTINCT age FROM users') print([row[0] for row in cur.fetchall()]) # 5. 查询订单日期在2021年9月1日至9月4日之间的订单总金额。 cur.execute('SELECT SUM(amount) FROM orders WHERE order_date BETWEEN "2021-09-01" AND "2021-09-04"') print(cur.fetchone()[0]) # 6. 查询年龄不大于25岁的用户的订单数量,并按照降序排序。 cur.execute(''' SELECT users.name, COUNT(*) AS total_orders FROM users JOIN orders ON users.id = orders.user_id WHERE age <= 25 GROUP BY users.id ORDER BY total_orders DESC ''') print(cur.fetchall()) # 7. 查询订单总金额排名前3的用户的姓名和订单总金额。 cur.execute(''' SELECT users.name, SUM(amount) AS total_amount FROM users JOIN orders ON users.id = orders.user_id GROUP BY users.id ORDER BY total_amount DESC LIMIT 3 ''') print(cur.fetchall()) # 8. 查询订单总金额最大的用户的姓名和订单总金额。 cur.execute(''' SELECT users.name, SUM(amount) AS total_amount FROM users JOIN orders ON users.id = orders.user_id GROUP BY users.id ORDER BY total_amount DESC LIMIT 1 ''') print(cur.fetchone()) # 9. 查询订单总金额最小的用户的姓名和订单总金额。 cur.execute(''' SELECT users.name, SUM(amount) AS total_amount FROM users JOIN orders ON users.id = orders.user_id GROUP BY users.id ORDER BY total_amount ASC LIMIT 1 ''') print(cur.fetchone()) # 10. 查询所有名字中含有“李”的用户,按照名字升序排序。 cur.execute('SELECT * FROM users WHERE name LIKE "%李%" ORDER BY name ASC') print(cur.fetchall()) # 11. 查询所有年龄大于20岁的用户,按照年龄降序排序,并只显示前5条记录。 cur.execute('SELECT * FROM users WHERE age > 20 ORDER BY age DESC LIMIT 5') print(cur.fetchall()) # 12. 查询每个用户的订单数量和订单总金额,并按照总金额降序排序。 cur.execute(''' SELECT users.name, COUNT(*) AS total_orders, SUM(amount) AS total_amount FROM users JOIN orders ON users.id = orders.user_id GROUP BY users.id ORDER BY total_amount DESC ''') print(cur.fetchall()) # 关闭游标和连接 cur.close() conn.close() ``` 注意:在运行代码之前,需要先安装 PyMySQL 模块,可以使用以下命令进行安装: ``` pip install pymysql ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值