最近开发flink 项目使用json, 遇到一个play json 很大的坑,
错误提示1:
play.api.libs.json.JsResultException: JsResultException(errors:List((/cpu,List(JsonValidationError(List(error.expected.jsnumber),WrappedArray()))), (/memory/percent,List(JsonValidationError(List(error.expected.jsnumber),WrappedArray())))))
错误提示2:
at org.apache.flink.api.java.ClosureCleaner.clean(ClosureCleaner.java:99)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.clean(StreamExecutionEnvironment.java:1559)
at org.apache.flink.streaming.api.datastream.DataStream.clean(DataStream.java:185)
at org.apache.flink.streaming.api.datastream.DataStream.map(DataStream.java:585)
at org.apache.flink.streaming.api.scala.DataStream.map(DataStream.scala:634)
at com.linkaixin.scala.StreamingJob
.
m
a
i
n
(
S
t
r
e
a
m
i
n
g
J
o
b
.
s
c
a
l
a
:
123
)
a
t
c
o
m
.
l
i
n
k
a
i
x
i
n
.
s
c
a
l
a
.
S
t
r
e
a
m
i
n
g
J
o
b
.
m
a
i
n
(
S
t
r
e
a
m
i
n
g
J
o
b
.
s
c
a
l
a
)
C
a
u
s
e
d
b
y
:
j
a
v
a
.
i
o
.
N
o
t
S
e
r
i
a
l
i
z
a
b
l
e
E
x
c
e
p
t
i
o
n
:
p
l
a
y
.
a
p
i
.
l
i
b
s
.
j
s
o
n
.
R
e
a
d
s
.main(StreamingJob.scala:123) at com.linkaixin.scala.StreamingJob.main(StreamingJob.scala) Caused by: java.io.NotSerializableException: play.api.libs.json.Reads
.main(StreamingJob.scala:123)atcom.linkaixin.scala.StreamingJob.main(StreamingJob.scala)Causedby:java.io.NotSerializableException:play.api.libs.json.Reads$anon$6
at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1185)
at java.base/java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1553)
at java.base/java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1510)
at java.base/java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1433)
at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1179)
at java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:349)
at org.apache.flink.util.InstantiationUtil.serializeObject(InstantiationUtil.java:534)
at org.apache.flink.api.java.ClosureCleaner.clean(ClosureCleaner.java:81)
… 6 more
正确代码如下:
然后
正确的代码如下:
````
package com.linkaixin.scala
import java.util.{Date, Properties}
import org.apache.flink.api.common.functions.MapFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, }
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.streaming.api.scala.
import play.api.libs.json._
import play.api.libs.json.Reads._
import play.api.libs.functional.syntax._
import java.util.Date
/**
- Skeleton for a Flink Streaming Job.
- For a tutorial how to write a Flink streaming application, check the
- tutorials and examples on the Flink Website.
- To package your application into a JAR file for execution, run
- ‘mvn clean package’ on the command line.
- If you change the name of the main class (with the public static void main(String[] args))
- method, change the respective entry in the POM.xml file (simply search for ‘mainClass’).
*/
case class Memory(raw: String, percent: Float)
case class DockerContainerStatus(time: Date, ID: String, name: String, cpu: Float,
memory: Memory, netIO: String, blockIO: String, PIDs: String)
object StreamingJob {
def main(args: Array[String]) {
playJsonTest()
// 放这里没有用.
// implicit val memoryReads: Reads[Memory] = (
// (JsPath \ “raw”).read[String] and
// (JsPath \ “percent”).read[String].map(x=>x.replace("%","").trim.toFloat/100.0f)
// ) (Memory.apply _)
//
// implicit val memoryWrites = new Writes[Memory] {
// def writes(memory: Memory) = Json.obj(
// “raw” -> memory.raw,
// “percent” -> memory.percent
// )
// }
//
// implicit val dockerContainerStatusReads: Reads[DockerContainerStatus] = (
// (JsPath \ “time”).read[Date] and
// (JsPath \ “ID”).read[String] and
// (JsPath \ “name”).read[String] and
// (JsPath \ “cpu”).read[String].map(x=>x.replace("%","").trim.toFloat/100.0f) and
// (JsPath \ “memory”).read[Memory] and
// (JsPath \ “netIO”).read[String] and
// (JsPath \ “blockIO”).read[String] and
// (JsPath \ “PIDs”).read[String]) (DockerContainerStatus.apply _)
//
//
// implicit val dockerContainerStatusWrites = new Writes[DockerContainerStatus] {
// def writes(d: DockerContainerStatus) = Json.obj(
// “time” -> d.time,
// “ID” -> d.ID,
// “name” -> d.name,
// “cpu”.replaceAll("%", “”).trim -> d.cpu,
// “memory” -> d.memory,
// “netIO” -> d.netIO,
// “blockIO” -> d.blockIO,
// “PIDs” -> d.PIDs
// )
// }
// set up the streaming execution environment
val env = StreamExecutionEnvironment.getExecutionEnvironment
// use event time for the application
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
// configure watermark interval
env.getConfig.setAutoWatermarkInterval(5000L)
val properties = new Properties()
val hostname = "localhost"
//val hostname = "10.5.6.31"
properties.setProperty("bootstrap.servers", hostname + ":9092")
// only required for Kafka 0.8
properties.setProperty("zookeeper.connect", hostname + ":2181")
properties.setProperty("group.id", "test")
val kafkaQueueTopic = "log"
val sourceStream = new FlinkKafkaConsumer[String](kafkaQueueTopic, new SimpleStringSchema(), properties)
.setStartFromTimestamp(1)
//这个必须定义,否则返回警告: no implicits found for parameter formats: Formats, Manifest
//implicit val formats = DefaultFormats
// val dataStream = env.addSource[String](sourceStream).filter(s => {
// !s.contains("CONTAINER")
// })
val dataStream = env.addSource[String](sourceStream)
//
// //必须用lazy 这个注释的代码没有用.
// lazy implicit val memoryFormat = Json.format[Memory]
// lazy implicit val dockerContainerStatusFormat = Json.format[DockerContainerStatus]
val dockerContainerStatusStream = dataStream.map(new MapFunction[String, DockerContainerStatus]() {
@throws[Exception]
override def map(input: String): DockerContainerStatus = {
//必须定义在函数这里,
implicit val memoryReads: Reads[Memory] = (
(JsPath \ "raw").read[String] and
(JsPath \ "percent").read[String].map(x=>x.replace("%","").trim.toFloat/100.0f)
) (Memory.apply _)
implicit val memoryWrites = new Writes[Memory] {
def writes(memory: Memory) = Json.obj(
"raw" -> memory.raw,
"percent" -> memory.percent
)
}
implicit val dockerContainerStatusReads: Reads[DockerContainerStatus] = (
(JsPath \ "time").read[Date] and
(JsPath \ "ID").read[String] and
(JsPath \ "name").read[String] and
(JsPath \ "cpu").read[String].map(x=>x.replace("%","").trim.toFloat/100.0f) and
(JsPath \ "memory").read[Memory] and
(JsPath \ "netIO").read[String] and
(JsPath \ "blockIO").read[String] and
(JsPath \ "PIDs").read[String]) (DockerContainerStatus.apply _)
implicit val dockerContainerStatusWrites = new Writes[DockerContainerStatus] {
def writes(d: DockerContainerStatus) = Json.obj(
"time" -> d.time,
"ID" -> d.ID,
"name" -> d.name,
"cpu".replaceAll("%", "").trim -> d.cpu,
"memory" -> d.memory,
"netIO" -> d.netIO,
"blockIO" -> d.blockIO,
"PIDs" -> d.PIDs
)
}
val jsonString = input.replaceAll("\\[2J\\[H", "").replace("%","").stripMargin
// val pos = jsonString.indexOf(",")
// print(“pos=” + pos + " \n")
var dockerContainerStatus = new DockerContainerStatus(new Date, “0000”, “aaaaa”, 0.0f, new Memory(“0MB”, 0.0f), “0KB”, “kkk”, “222”)
try {
val jsonObject = Json.parse(jsonString)
//print("标准格式的JSON="+jsonObject.toString()+"\n")
//val json2=Json.parse(json.toString())
//val result=jsonObject.validate[DockerContainerStatus]
//print("结果="+result.get+"\n")
//print("\n"+jsonString)
dockerContainerStatus = jsonObject.as[DockerContainerStatus]
} catch {
case ex: Exception => print("转换异常:"+ex+" \njsonString=" + jsonString)
}
finally {
// your scala code here, such as to close a database connection
//print("最后....")
}
dockerContainerStatus
}
}
)
// dockerContainerStatusStream.print()
//dockerContainerStatusStream.keyBy(_.ID).reduce()
dockerContainerStatusStream.keyBy(1).print()
//dockerContainerStatusStream.print()
/*
* Here, you can start creating your execution plan for Flink.
*
* Start with getting some data from the environment, like
* env.readTextFile(textPath);
*
* then, transform the resulting DataStream[String] using operations
* like
* .filter()
* .flatMap()
* .join()
* .group()
*
* and many more.
* Have a look at the programming guide:
*
* http://flink.apache.org/docs/latest/apis/streaming/index.html
*
*/
//2.创造测试数据
env.execute("Flink Streaming Scala API Skeleton")
}
def playJsonTest()={
//
implicit val memoryReads: Reads[Memory] = (
(JsPath \ “raw”).read[String] and
(JsPath \ “percent”).read[String].map(x=>x.replace("%","").trim.toFloat/100.0f)
) (Memory.apply _)
implicit val memoryWrites = new Writes[Memory] {
def writes(memory: Memory) = Json.obj(
"raw" -> memory.raw,
"percent" -> memory.percent
)
}
implicit val dockerContainerStatusReads: Reads[DockerContainerStatus] = (
(JsPath \ "time").read[Date] and
(JsPath \ "ID").read[String] and
(JsPath \ "name").read[String] and
(JsPath \ "cpu").read[String].map(x=>x.replace("%","").trim.toFloat/100.0f) and
(JsPath \ "memory").read[Memory] and
(JsPath \ "netIO").read[String] and
(JsPath \ "blockIO").read[String] and
(JsPath \ "PIDs").read[String]) (DockerContainerStatus.apply _)
implicit val dockerContainerStatusWrites = new Writes[DockerContainerStatus] {
def writes(d: DockerContainerStatus) = Json.obj(
"time" -> d.time,
"ID" -> d.ID,
"name" -> d.name,
"cpu".replaceAll("%", "").trim -> d.cpu,
"memory" -> d.memory,
"netIO" -> d.netIO,
"blockIO" -> d.blockIO,
"PIDs" -> d.PIDs
)
}
val k=
"""
{
"time":"2019-07-15 22:54:34",
"ID": "62fecefa76c2",
"name": "mesos-e2bee601-9b8d-48e1-9711-df00e8b8e957",
"cpu": "0.00%",
"memory": {"raw": "67.91MiB / 4GiB","percent": "1.66%"},
"netIO": "27.9kB / 28.8kB",
"blockIO": "1.37MB / 81.9kB",
"PIDs": "8"
}
""".stripMargin
val json=Json.parse(k)
//val p=json.asOpt[DockerContainerStatus]
val dockerContainerStatus=json.as[DockerContainerStatus]
print("JSON 格式验证通过,play-json工作正常!"+dockerContainerStatus)
}
// def t1()={
// //implicit val memoryFormat = Json.format[Memory]
// //implicit val dockerContainerStatusFormat = Json.format[DockerContainerStatus]
// implicit val memoryFormat = Json.format[Memory]
// implicit val dockerContainerStatusFormat = Json.format[DockerContainerStatus]
// val k=
// “”"
// {
// “time”:“2019-07-15 22:54:34”,
// “ID”: “62fecefa76c2”,
// “name”: “mesos-e2bee601-9b8d-48e1-9711-df00e8b8e957”,
// “cpu”: “0.00%”,
// “memory”: {“raw”: “67.91MiB / 4GiB”,“percent”: “1.66%”},
// “netIO”: “27.9kB / 28.8kB”,
// “blockIO”: “1.37MB / 81.9kB”,
// “PIDs”: “8”
// }
// “”".stripMargin
// val json=Json.parse(k)
// val dockerContainerStatus=json.as[DockerContainerStatus]
// print(dockerContainerStatus)
// }
}
根本原因是: Flink 运行时,找不到下面2个case class 的系列化和反系列化方法导致的.
这样把reads, writes 方法放在同一个函数里,就解决了。
case class Memory(raw: String, percent: Float)
case class DockerContainerStatus(time: Date, ID: String, name: String, cpu: Float,
memory: Memory, netIO: String, blockIO: String, PIDs: String)
这个2个类的实现方法. 这个必须定义在Flink map函数里
```
//必须定义在函数这里,
implicit val memoryReads: Reads[Memory] = (
(JsPath \ "raw").read[String] and
(JsPath \ "percent").read[String].map(x=>x.replace("%","").trim.toFloat/100.0f)
) (Memory.apply _)
implicit val memoryWrites = new Writes[Memory] {
def writes(memory: Memory) = Json.obj(
"raw" -> memory.raw,
"percent" -> memory.percent
)
}
implicit val dockerContainerStatusReads: Reads[DockerContainerStatus] = (
(JsPath \ "time").read[Date] and
(JsPath \ "ID").read[String] and
(JsPath \ "name").read[String] and
(JsPath \ "cpu").read[String].map(x=>x.replace("%","").trim.toFloat/100.0f) and
(JsPath \ "memory").read[Memory] and
(JsPath \ "netIO").read[String] and
(JsPath \ "blockIO").read[String] and
(JsPath \ "PIDs").read[String]) (DockerContainerStatus.apply _)
implicit val dockerContainerStatusWrites = new Writes[DockerContainerStatus] {
def writes(d: DockerContainerStatus) = Json.obj(
"time" -> d.time,
"ID" -> d.ID,
"name" -> d.name,
"cpu".replaceAll("%", "").trim -> d.cpu,
"memory" -> d.memory,
"netIO" -> d.netIO,
"blockIO" -> d.blockIO,
"PIDs" -> d.PIDs
)
}
````