rdd
.filter(
line=>{
(line.split("\t")(1).startsWith(“2_”) && line.split("\t")(3).split(",").length > 0)
}
)
.map(
line =>{
val value = line.split("\t")
val pv = Integer.valueOf(value(4))
(value(1) + “\t” + value(2) + “,” + “\t” + value(0), pv)
}
)
.reduceByKey((x, y) =>{
val pv = x + y
pv
}
.map(line => {
val value_1 = line._1.split("\t")
val pv = line._2
(value_1(0) + “\t” + value_1(1), pv)
})
.mapValues(pv => (pv, 1))
.reduceByKey((x, y) => {
(x._1 + y._1, x._2 + y._2)
})
.map(line => {
//sid os,sv, pv,uv
line._1 + “\t” + line._2._1 + “,” + line._2._2
})
spark算子算pv、uv
最新推荐文章于 2022-07-22 14:16:13 发布