解析json格式的日志文件
数据准备
Teacher
public class Teacher {
private Integer id;
private String name;
private String tel;
private String email;
private Department dept;
private ArrayList<Student> stus;
public Teacher() {
}
public Teacher(Integer id, String name, String tel, String email, Department dept, ArrayList<Student> stus) {
this.id = id;
this.name = name;
this.tel = tel;
this.email = email;
this.dept = dept;
this.stus = stus;
}
/**
* 获取
* @return id
*/
public Integer getId() {
return id;
}
/**
* 设置
* @param id
*/
public void setId(Integer id) {
this.id = id;
}
/**
* 获取
* @return name
*/
public String getName() {
return name;
}
/**
* 设置
* @param name
*/
public void setName(String name) {
this.name = name;
}
/**
* 获取
* @return tel
*/
public String getTel() {
return tel;
}
/**
* 设置
* @param tel
*/
public void setTel(String tel) {
this.tel = tel;
}
/**
* 获取
* @return email
*/
public String getEmail() {
return email;
}
/**
* 设置
* @param email
*/
public void setEmail(String email) {
this.email = email;
}
/**
* 获取
* @return dept
*/
public Department getDept() {
return dept;
}
/**
* 设置
* @param dept
*/
public void setDept(Department dept) {
this.dept = dept;
}
/**
* 获取
* @return stus
*/
public ArrayList<Student> getStus() {
return stus;
}
/**
* 设置
* @param stus
*/
public void setStus(ArrayList<Student> stus) {
this.stus = stus;
}
public String toString() {
return "Teacher{id = " + id + ", name = " + name + ", tel = " + tel + ", email = " + email + ", dept = " + dept + ", stus = " + stus + "}";
}
}
Student
public class Student {
private Integer id;
private String name;
private String grade;
private School school;
public Student() {
}
public Student(Integer id, String name, String grade, School school) {
this.id = id;
this.name = name;
this.grade = grade;
this.school = school;
}
/**
* 获取
* @return id
*/
public Integer getId() {
return id;
}
/**
* 设置
* @param id
*/
public void setId(Integer id) {
this.id = id;
}
/**
* 获取
* @return name
*/
public String getName() {
return name;
}
/**
* 设置
* @param name
*/
public void setName(String name) {
this.name = name;
}
/**
* 获取
* @return grade
*/
public String getGrade() {
return grade;
}
/**
* 设置
* @param grade
*/
public void setGrade(String grade) {
this.grade = grade;
}
/**
* 获取
* @return school
*/
public School getSchool() {
return school;
}
/**
* 设置
* @param school
*/
public void setSchool(School school) {
this.school = school;
}
public String toString() {
return "Student{id = " + id + ", name = " + name + ", grade = " + grade + ", school = " + school + "}";
}
}
Department
public class Department {
private String name;
private String describe;
public Department() {
}
public Department(String name, String describe) {
this.name = name;
this.describe = describe;
}
/**
* 获取
* @return name
*/
public String getName() {
return name;
}
/**
* 设置
* @param name
*/
public void setName(String name) {
this.name = name;
}
/**
* 获取
* @return describe
*/
public String getDescribe() {
return describe;
}
/**
* 设置
* @param describe
*/
public void setDescribe(String describe) {
this.describe = describe;
}
public String toString() {
return "Department{name = " + name + ", describe = " + describe + "}";
}
}
School
public class School {
private String name;
private String leader;
private String address;
public School() {
}
public School(String name, String leader, String address) {
this.name = name;
this.leader = leader;
this.address = address;
}
/**
* 获取
* @return name
*/
public String getName() {
return name;
}
/**
* 设置
* @param name
*/
public void setName(String name) {
this.name = name;
}
/**
* 获取
* @return leader
*/
public String getLeader() {
return leader;
}
/**
* 设置
* @param leader
*/
public void setLeader(String leader) {
this.leader = leader;
}
/**
* 获取
* @return address
*/
public String getAddress() {
return address;
}
/**
* 设置
* @param address
*/
public void setAddress(String address) {
this.address = address;
}
public String toString() {
return "School{name = " + name + ", leader = " + leader + ", address = " + address + "}";
}
}
实例化对象
public static void main(String[] args) {
Teacher teacher = new Teacher();
School school1 = new School();
school1.setAddress("南京");
school1.setLeader("某某某");
school1.setName("南京大学");
School school2 = new School();
school2.setAddress("南京");
school2.setLeader("某某某");
school2.setName("南京大学");
School school3 = new School();
school3.setAddress("南京");
school3.setLeader("某某某");
school3.setName("南京大学");
Department department = new Department();
department.setName("学术部");
department.setDescribe("主要负责学术研究");
Student stu1 = new Student();
stu1.setGrade("大一");
stu1.setName("zs");
stu1.setId(1212);
stu1.setSchool(school1);
Student stu2 = new Student();
stu2.setGrade("大二");
stu2.setName("ww");
stu2.setId(4532);
stu2.setSchool(school2);
Student stu3 = new Student();
stu3.setGrade("大三");
stu3.setName("zq");
stu3.setId(9086);
stu3.setSchool(school3);
ArrayList<Student> students = new ArrayList<>();
students.add(stu1);
students.add(stu2);
students.add(stu3);
teacher.setDept(department);
teacher.setEmail("gree@edu.cn");
teacher.setId(79);
teacher.setName("gree");
teacher.setStus(students);
teacher.setTel("1111111111");
String jsonString = JSON.toJSONString(teacher);
System.out.println(jsonString);
}
数据结构
将数据保存到teacher.log文件中,并设置log_id来表示条数
解析数据
object JsonTeacher {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("retentionDemo").setMaster("local[*]")
val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
val sc: SparkContext = spark.sparkContext
import org.apache.spark.sql.functions._
import spark.implicits._
val optinRDD: RDD[String] = sc.textFile("in/teacher.log")
optinRDD.foreach(println)
val frame: RDD[(String, String)] = optinRDD.map(
x => {
//返回第一个,所在的位置
val i: Int = x.indexOf(",")//1
//开始截取
//(0,i)--->(0,1)
//(i+1) 2 从下标元素开始到末尾
val tuple: (String, String) = (x.substring(0, i), x.substring(i + 1))
tuple
}
)
//将 RDD[String]转换为DataFrame
val frame1: DataFrame = frame.toDF("log_id", "value")
//将json类型数据按照需求进行切分
val frame2: DataFrame = frame1.select(
$"log_id",
get_json_object($"value","$.dept").as("dept"),
get_json_object($"value","$.email").as("teacher_email"),
get_json_object($"value","$.id").as("teacher_id"),
get_json_object($"value","$.name").as("teacher_name"),
get_json_object($"value","$.tel").as("teacher_tel"),
get_json_object($"value","$.stus").as("stus"),
)
frame2.printSchema()
frame2.show(false)
//将dept进行拆分
val frame3: DataFrame = frame2.select(
$"log_id", $"teacher_name", $"teacher_id", $"teacher_email", $"teacher_tel", $"stus",
get_json_object($"dept", "$.describe").as("dept_describe"),
get_json_object($"dept", "$.name").as("dept_name")
)
frame3.printSchema()
frame3.show(false)
//将stus拆分
val frame4: DataFrame = frame3.select(
$"log_id", $"teacher_name", $"teacher_id", $"teacher_email", $"teacher_tel", $"dept_describe", $"dept_name"
, from_json(
$"stus"
, ArrayType(
StructType(
StructField("grade", StringType) :: StructField("id", StringType) :: StructField("name", StringType) :: StructField("school", StringType) :: Nil
)
)
).as("stus")
)
frame4.printSchema()
frame4.show(false)
//使用炸裂explode函数将stus炸裂
val frame5: DataFrame = frame4.withColumn("stus", explode($"stus"))
frame5.printSchema()
frame5.show(false)
//将stus更名
val frame6: DataFrame = frame5
.withColumn("grade", $"stus.grade").as("grade")
.withColumn("id", $"stus.id").as("id")
.withColumn("name", $"stus.name").as("name")
.withColumn("school", $"stus.school").as("school")
.drop("stus")
frame6.printSchema()
frame6.show(false)
//将school拆分
val frame7: DataFrame = frame6.select(
$"log_id",$"teacher_name",$"teacher_id",$"teacher_email",$"teacher_tel",
$"dept_describe",$"dept_name",$"grade",$"id",$"name",
get_json_object($"school", "$.address").as("address"),
get_json_object($"school", "$.leader").as("leader"),
get_json_object($"school", "$.name").as("school_name")
)
frame7.show()
frame7.printSchema()
//写入数据库中,表名为table_json
JdbcUtils.dataFrameToMysql(frame7,table_json,1)
println("操作结束")
}
}
连接数据库工具类
object JdbcUtils {
val url = "jdbc:mysql://192.168.95.130:3306/logdemo?createDatabaseIfNotExist=true"
val dirver = "com.mysql.cj.jdbc.Driver"
val user = "root"
val password = "root"
//表名
val table_json="json_log"
val properties = new Properties()
properties.setProperty("user", JdbcUtils.user)
properties.setProperty("password", JdbcUtils.password)
properties.setProperty("driver", JdbcUtils.dirver)
def dataFrameToMysql(df: DataFrame, table: String, op: Int = 1): Unit = {
if (op == 0) {
df.write.mode(SaveMode.Append).jdbc(JdbcUtils.url, table, properties)
} else {
df.write.mode(SaveMode.Overwrite).jdbc(JdbcUtils.url, table, properties)
}
}
def getDataFrameByTableName(spark:SparkSession,table:String):DataFrame={
val frame: DataFrame = spark.read.jdbc(JdbcUtils.url, table,JdbcUtils.properties)
frame
}
}