Spark---解析json格式的日志文件

解析json格式的日志文件

数据准备

Teacher

public class Teacher {
    private Integer id;
    private String name;
    private String tel;
    private String email;
    private Department dept;
    private ArrayList<Student> stus;

    public Teacher() {
    }

    public Teacher(Integer id, String name, String tel, String email, Department dept, ArrayList<Student> stus) {
        this.id = id;
        this.name = name;
        this.tel = tel;
        this.email = email;
        this.dept = dept;
        this.stus = stus;
    }

    /**
     * 获取
     * @return id
     */
    public Integer getId() {
        return id;
    }

    /**
     * 设置
     * @param id
     */
    public void setId(Integer id) {
        this.id = id;
    }

    /**
     * 获取
     * @return name
     */
    public String getName() {
        return name;
    }

    /**
     * 设置
     * @param name
     */
    public void setName(String name) {
        this.name = name;
    }

    /**
     * 获取
     * @return tel
     */
    public String getTel() {
        return tel;
    }

    /**
     * 设置
     * @param tel
     */
    public void setTel(String tel) {
        this.tel = tel;
    }

    /**
     * 获取
     * @return email
     */
    public String getEmail() {
        return email;
    }

    /**
     * 设置
     * @param email
     */
    public void setEmail(String email) {
        this.email = email;
    }

    /**
     * 获取
     * @return dept
     */
    public Department getDept() {
        return dept;
    }

    /**
     * 设置
     * @param dept
     */
    public void setDept(Department dept) {
        this.dept = dept;
    }

    /**
     * 获取
     * @return stus
     */
    public ArrayList<Student> getStus() {
        return stus;
    }

    /**
     * 设置
     * @param stus
     */
    public void setStus(ArrayList<Student> stus) {
        this.stus = stus;
    }

    public String toString() {
        return "Teacher{id = " + id + ", name = " + name + ", tel = " + tel + ", email = " + email + ", dept = " + dept + ", stus = " + stus + "}";
    }
}

Student

public class Student {
    private Integer id;
    private String name;
    private String grade;
    private School school;

    public Student() {
    }

    public Student(Integer id, String name, String grade, School school) {
        this.id = id;
        this.name = name;
        this.grade = grade;
        this.school = school;
    }

    /**
     * 获取
     * @return id
     */
    public Integer getId() {
        return id;
    }

    /**
     * 设置
     * @param id
     */
    public void setId(Integer id) {
        this.id = id;
    }

    /**
     * 获取
     * @return name
     */
    public String getName() {
        return name;
    }

    /**
     * 设置
     * @param name
     */
    public void setName(String name) {
        this.name = name;
    }

    /**
     * 获取
     * @return grade
     */
    public String getGrade() {
        return grade;
    }

    /**
     * 设置
     * @param grade
     */
    public void setGrade(String grade) {
        this.grade = grade;
    }

    /**
     * 获取
     * @return school
     */
    public School getSchool() {
        return school;
    }

    /**
     * 设置
     * @param school
     */
    public void setSchool(School school) {
        this.school = school;
    }

    public String toString() {
        return "Student{id = " + id + ", name = " + name + ", grade = " + grade + ", school = " + school + "}";
    }
  }

Department

public class Department {
    private String name;
    private String describe;

    public Department() {
    }

    public Department(String name, String describe) {
        this.name = name;
        this.describe = describe;
    }

    /**
     * 获取
     * @return name
     */
    public String getName() {
        return name;
    }

    /**
     * 设置
     * @param name
     */
    public void setName(String name) {
        this.name = name;
    }

    /**
     * 获取
     * @return describe
     */
    public String getDescribe() {
        return describe;
    }

    /**
     * 设置
     * @param describe
     */
    public void setDescribe(String describe) {
        this.describe = describe;
    }

    public String toString() {
        return "Department{name = " + name + ", describe = " + describe + "}";
    }
}

School

public class School {
    private String name;
    private String leader;
    private String address;

    public School() {
    }

    public School(String name, String leader, String address) {
        this.name = name;
        this.leader = leader;
        this.address = address;
    }

    /**
     * 获取
     * @return name
     */
    public String getName() {
        return name;
    }

    /**
     * 设置
     * @param name
     */
    public void setName(String name) {
        this.name = name;
    }

    /**
     * 获取
     * @return leader
     */
    public String getLeader() {
        return leader;
    }

    /**
     * 设置
     * @param leader
     */
    public void setLeader(String leader) {
        this.leader = leader;
    }

    /**
     * 获取
     * @return address
     */
    public String getAddress() {
        return address;
    }

    /**
     * 设置
     * @param address
     */
    public void setAddress(String address) {
        this.address = address;
    }

    public String toString() {
        return "School{name = " + name + ", leader = " + leader + ", address = " + address + "}";
    }
}

实例化对象

 public static void main(String[] args) {
        Teacher teacher = new Teacher();
        School school1 = new School();
        school1.setAddress("南京");
        school1.setLeader("某某某");
        school1.setName("南京大学");
        School school2 = new School();
        school2.setAddress("南京");
        school2.setLeader("某某某");
        school2.setName("南京大学");
        School school3 = new School();
        school3.setAddress("南京");
        school3.setLeader("某某某");
        school3.setName("南京大学");
        Department department = new Department();
        department.setName("学术部");
        department.setDescribe("主要负责学术研究");
        Student stu1 = new Student();
        stu1.setGrade("大一");
        stu1.setName("zs");
        stu1.setId(1212);
        stu1.setSchool(school1);
        Student stu2 = new Student();
        stu2.setGrade("大二");
        stu2.setName("ww");
        stu2.setId(4532);
        stu2.setSchool(school2);
        Student stu3 = new Student();
        stu3.setGrade("大三");
        stu3.setName("zq");
        stu3.setId(9086);
        stu3.setSchool(school3);
        ArrayList<Student> students = new ArrayList<>();
        students.add(stu1);
        students.add(stu2);
        students.add(stu3);

        teacher.setDept(department);
        teacher.setEmail("gree@edu.cn");
        teacher.setId(79);
        teacher.setName("gree");
        teacher.setStus(students);
        teacher.setTel("1111111111");

        String jsonString = JSON.toJSONString(teacher);
        System.out.println(jsonString);
    }

数据结构

在这里插入图片描述
将数据保存到teacher.log文件中,并设置log_id来表示条数
在这里插入图片描述

解析数据

object JsonTeacher {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setAppName("retentionDemo").setMaster("local[*]")
    val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
    val sc: SparkContext = spark.sparkContext

    import org.apache.spark.sql.functions._
    import spark.implicits._

    val optinRDD: RDD[String] = sc.textFile("in/teacher.log")
    optinRDD.foreach(println)
    val frame: RDD[(String, String)] = optinRDD.map(
      x => {
        //返回第一个,所在的位置
        val i: Int = x.indexOf(",")//1
        //开始截取
        //(0,i)--->(0,1)
        //(i+1) 2 从下标元素开始到末尾
        val tuple: (String, String) = (x.substring(0, i), x.substring(i + 1))
        tuple
      }
    )
    //将 RDD[String]转换为DataFrame
    val frame1: DataFrame = frame.toDF("log_id", "value")
    //将json类型数据按照需求进行切分
    val frame2: DataFrame = frame1.select(
      $"log_id",
      get_json_object($"value","$.dept").as("dept"),
      get_json_object($"value","$.email").as("teacher_email"),
      get_json_object($"value","$.id").as("teacher_id"),
      get_json_object($"value","$.name").as("teacher_name"),
      get_json_object($"value","$.tel").as("teacher_tel"),
      get_json_object($"value","$.stus").as("stus"),
    )
    frame2.printSchema()
    frame2.show(false)

    //将dept进行拆分
    val frame3: DataFrame = frame2.select(
      $"log_id", $"teacher_name", $"teacher_id", $"teacher_email", $"teacher_tel", $"stus",
      get_json_object($"dept", "$.describe").as("dept_describe"),
      get_json_object($"dept", "$.name").as("dept_name")
    )
    frame3.printSchema()
    frame3.show(false)

    //将stus拆分
    val frame4: DataFrame = frame3.select(
      $"log_id", $"teacher_name", $"teacher_id", $"teacher_email", $"teacher_tel", $"dept_describe", $"dept_name"
      , from_json(
        $"stus"
        , ArrayType(
          StructType(
            StructField("grade", StringType) :: StructField("id", StringType) :: StructField("name", StringType) :: StructField("school", StringType) :: Nil
          )
        )
      ).as("stus")
    )
    frame4.printSchema()
    frame4.show(false)

    //使用炸裂explode函数将stus炸裂
    val frame5: DataFrame = frame4.withColumn("stus", explode($"stus"))
    frame5.printSchema()
    frame5.show(false)

    //将stus更名
    val frame6: DataFrame = frame5
      .withColumn("grade", $"stus.grade").as("grade")
      .withColumn("id", $"stus.id").as("id")
      .withColumn("name", $"stus.name").as("name")
      .withColumn("school", $"stus.school").as("school")
      .drop("stus")
    frame6.printSchema()
    frame6.show(false)

    //将school拆分
    val frame7: DataFrame = frame6.select(
      $"log_id",$"teacher_name",$"teacher_id",$"teacher_email",$"teacher_tel",
      $"dept_describe",$"dept_name",$"grade",$"id",$"name",
      get_json_object($"school", "$.address").as("address"),
      get_json_object($"school", "$.leader").as("leader"),
      get_json_object($"school", "$.name").as("school_name")
    )
    frame7.show()
    frame7.printSchema()

    //写入数据库中,表名为table_json
    JdbcUtils.dataFrameToMysql(frame7,table_json,1)
    println("操作结束")
  }
}

在这里插入图片描述

连接数据库工具类

object JdbcUtils {
  val url = "jdbc:mysql://192.168.95.130:3306/logdemo?createDatabaseIfNotExist=true"
  val dirver = "com.mysql.cj.jdbc.Driver"
  val user = "root"
  val password = "root"

  //表名
  val table_json="json_log"

  val properties = new Properties()
  properties.setProperty("user", JdbcUtils.user)
  properties.setProperty("password", JdbcUtils.password)
  properties.setProperty("driver", JdbcUtils.dirver)

  def dataFrameToMysql(df: DataFrame, table: String, op: Int = 1): Unit = {
    if (op == 0) {
      df.write.mode(SaveMode.Append).jdbc(JdbcUtils.url, table, properties)
    } else {
      df.write.mode(SaveMode.Overwrite).jdbc(JdbcUtils.url, table, properties)
    }
  }

  def getDataFrameByTableName(spark:SparkSession,table:String):DataFrame={
    val frame: DataFrame = spark.read.jdbc(JdbcUtils.url, table,JdbcUtils.properties)
    frame
  }
}

在这里插入图片描述
在这里插入图片描述

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值