目录
整个项目我们要做的事情:
创建父工程
父工程Module:gmall1015
给父工程添加依赖:
<groupId>org.example</groupId>
<artifactId>gmall1015</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<modules>
<module>gmall-common</module>
<module>gmall-mock</module>
</modules>
<properties>
<spark.version>2.1.1</spark.version>
<scala.version>2.11.8</scala.version>
<log4j.version>1.2.17</log4j.version>
<slf4j.version>1.7.22</slf4j.version>
<fastjson.version>1.2.47</fastjson.version>
<httpclient.version>4.5.5</httpclient.version>
<httpmime.version>4.3.6</httpmime.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<!--此处放日志包,所有项目都要引用-->
<!-- 所有子项目的日志框架 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
<!-- 具体的日志实现 -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>${httpmime.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<!-- 该插件用于将Scala代码编译成class文件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution>
<!-- 声明绑定到maven的compile阶段 -->
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
创建 common 子模块
模块名
<artifactId>gmall-common</artifactId>
给 common 子模块添加依赖
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
</dependency>
</dependencies>
说明:
• 这个模块内放一些通用代码. 以后用到其他依赖再添加
创建 mock 子模块
本模块用来生成模拟数据
模块名:
<artifactId>gmall-mock</artifactId>
给 mock 子模块添加依赖
<dependencies>
<dependency>
<groupId>org.example</groupId>
<artifactId>gmall-common</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
</dependencies>
工具类: RandomNumUtil
生成随机整数的工具类
创建包: com.donglin.gmall.mock.util
package com.donglin.gmall.mock.util
import java.util.Random
import scala.collection.mutable
/**
* 随机生成整数的工具类
*/
object RandomNumUtil {
val random = new Random()
/**
* 返回一个随机的整数 [from, to]
*
* @param from
* @param to
* @return
*/
def randomInt(from: Int, to: Int): Int = {
if (from > to) throw new IllegalArgumentException(s"from = $from 应该小于 to = $to")
// [0, to - from) + from [form, to -from + from ]
random.nextInt(to - from + 1) + from
}
/**
* 随机的Long [from, to]
*
* @param from
* @param to
* @return
*/
def randomLong(from: Long, to: Long): Long = {
if (from > to) throw new IllegalArgumentException(s"from = $from 应该小于 to = $to")
random.nextLong().abs % (to - from + 1) + from
}
/**
* 生成一系列的随机值
*
* @param from
* @param to
* @param count
* @param canRepeat 是否允许随机数重复
*/
def randomMultiInt(from: Int, to: Int, count: Int, canRepeat: Boolean = true): List[Int] = {
if (canRepeat) {
(1 to count).map(_ => randomInt(from, to)).toList
} else {
val set: mutable.Set[Int] = mutable.Set[Int]()
while (set.size < count) {
set += randomInt(from, to)
}
set.toList
}
}
def main(args: Array[String]): Unit = {
println(randomMultiInt(1, 15, 10))
println(randomMultiInt(1, 8, 6, false))
}
}
工具类: RadomOptions
按照一定的分布生成随机选项的工具类
package com.donglin.gmall.mock.util
import scala.collection.mutable.ListBuffer
/**
* 根据提供的值和比重, 来创建RandomOptions对象.
* 然后可以通过getRandomOption来获取一个随机的预定义的值
*/
object RandomOptions {
def apply[T](opts: (T, Int)*): RandomOptions[T] = {
val randomOptions = new RandomOptions[T]()
randomOptions.totalWeight = (0 /: opts) (_ + _._2) // 计算出来总的比重
opts.foreach {
// 张三 10
case (value, weight) => randomOptions.options ++= (1 to weight).map(_ => value)
}
randomOptions
}
def main(args: Array[String]): Unit = {
// 测试
val opts = RandomOptions(("张三", 10), ("李四", 30), ("ww", 20))
println(opts.getRandomOption())
println(opts.getRandomOption())
println(opts.getRandomOption())
println(opts.getRandomOption())
println(opts.getRandomOption())
}
}
// 工程师 10 程序猿 10 老师 20
class RandomOptions[T] {
var totalWeight: Int = _
var options = ListBuffer[T]()
/**
* 获取随机的 Option 的值
*
* @return
*/
def getRandomOption() = {
options(RandomNumUtil.randomInt(0, totalWeight - 1))
}
}
日志发送工具类: LogUploader
向服务器发送生成的日志的工具类
package com.donglin.gmall.mock.util
import java.io.OutputStream
import java.net.{HttpURLConnection, URL}
object LogUpload {
/*发送日志*/
def sendLog(log: String): Unit = {
try {
// 1. 日志服务器的地址
val logUrl = new URL("http://hadoop12:8080/log")
// 2. 得到一个 HttpURLConnection
val conn: HttpURLConnection = logUrl.openConnection().asInstanceOf[HttpURLConnection]
// 3. 设置请求方法(上传数据一般使用 post 请求)
conn.setRequestMethod("POST")
// 4. 用来供server进行时钟校对的
conn.setRequestProperty("clientTime", System.currentTimeMillis + "")
// 5. 允许上传数据
conn.setDoOutput(true)
// 6. 设置请求的头信息, post 请求必须这样设置
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded")
// 7. 获取上传用的输出流
val out: OutputStream = conn.getOutputStream
// 8. 写出数据
out.write(("log=" + log).getBytes())
// 9. flush
out.flush()
// 10. 关闭资源
out.close()
// 11. 获取响应码. (或者获取响应信息也行, 否则不会发送请求到服务器)
val code: Int = conn.getResponseCode
println(code)
} catch {
case e: Exception => e.printStackTrace()
}
}
}
生成日志: JsonMock
生成模拟数据
package com.donglin.gmall.mock
import java.util.Date
import com.alibaba.fastjson.{JSON, JSONObject}
import com.donglin.gmall.mock.util.{LogUpload, RandomNumUtil, RandomOptions}
object JsonMock {
val startupNum = 100000 // 生成的启动日志的记录数
val eventNum = 200000 // 生成的事件日志的记录数
// 操作系统的分布
val osOpts = RandomOptions(("ios", 3), ("android", 7))
// 日志开始时间
var startDate: Date = _
// 日志结束时间
var endDate: Date = _
// 地理位置分布
val areaOpts = RandomOptions(
("beijing", 20), ("shanghai", 20), ("guangdong", 20),
("hebei", 5), ("heilongjiang", 5), ("shandong", 5),
("tianjin", 5), ("guizhou", 5), ("shangxi", 5),
("sichuan", 5), ("xinjiang", 5)
)
// appId
val appId = "gmall1015"
// app 的版本分布
val versionOpts = RandomOptions(
("1.2.0", 50), ("1.1.2", 15),
("1.1.3", 30), ("1.1.1", 5))
// 用户行为的分布(事件分布)
val eventOpts = RandomOptions(
("addFavor", 10), ("addComment", 30),
("addCart", 20), ("clickItem", 40), ("coupon", 120))
// app 分发渠道分布
val channelOpts = RandomOptions(
("xiaomi", 10), ("huawei", 20), ("wandoujia", 30),
("360", 20), ("tencent", 20), ("baidu", 10), ("website", 10))
// 生成模拟数据的时候是否结束退出
val quitOpts = RandomOptions((true, 50), (false, 95))
// 模拟出来一条启动日志
def initOneStartupLog(): String = {
/*
`logType` string COMMENT '日志类型',
`mid` string COMMENT '设备唯一标识',
`uid` string COMMENT '用户标识',
`os` string COMMENT '操作系统', ,
`appId` string COMMENT '应用id', ,
`version` string COMMENT '版本号',
`ts` bigint COMMENT '启动时间', 考虑每个终端的时间的不准群性, 时间是将来在服务器端来生成
`area` string COMMENT '城市'
`channel` string COMMENT '渠道'
*/
val mid: String = "mid_" + RandomNumUtil.randomInt(1, 10)
val uid: String = "uid_" + RandomNumUtil.randomInt(1, 10000)
val os: String = osOpts.getRandomOption()
val appId: String = this.appId
val area: String = areaOpts.getRandomOption()
val version: String = versionOpts.getRandomOption()
val channel: String = channelOpts.getRandomOption()
val obj = new JSONObject()
obj.put("logType", "startup")
obj.put("mid", mid)
obj.put("uid", uid)
obj.put("os", os)
obj.put("appId", appId)
obj.put("area", area)
obj.put("channel", channel)
obj.put("version", version)
// 返回 json 格式字符串
obj.toJSONString
}
// 模拟出来一条事件日志 参数: json 格式的启动日志
def initOneEventLog(startupLogJson: String) = {
/*`
logType` string COMMENT '日志类型',
`mid` string COMMENT '设备唯一标识',
`uid` string COMMENT '用户标识',
`os` string COMMENT '操作系统',
`appId` string COMMENT '应用id',
`area` string COMMENT '地区' ,
`eventId` string COMMENT '事件id',
`pageId` string COMMENT '当前页',
`nextPageId` string COMMENT '跳转页',
`itemId` string COMMENT '商品编号',
*/
val startupLogObj: JSONObject = JSON.parseObject(startupLogJson)
val eventLogObj = new JSONObject()
eventLogObj.put("logType", "event")
eventLogObj.put("mid", startupLogObj.getString("mid"))
eventLogObj.put("uid", startupLogObj.getString("uid"))
eventLogObj.put("os", startupLogObj.getString("os"))
eventLogObj.put("appId", this.appId)
eventLogObj.put("area", startupLogObj.getString("area"))
eventLogObj.put("eventId", eventOpts.getRandomOption())
eventLogObj.put("pageId", RandomNumUtil.randomInt(1, 50))
eventLogObj.put("nextPageId", RandomNumUtil.randomInt(1, 50))
eventLogObj.put("itemId", RandomNumUtil.randomInt(1, 50))
eventLogObj.toJSONString
}
// 开始生成日志
def generateLog(): Unit = {
(0 to startupNum).foreach(_ => {
// 生成一条启动日志
val oneStartupLog: String = initOneStartupLog()
// 发送启动日志
println(oneStartupLog)
LogUpload.sendLog(oneStartupLog)
// 模拟出来多条事件日志
while (!quitOpts.getRandomOption()) {
// 生成一条事件日志
val oneEventLog: String = initOneEventLog(oneStartupLog)
// 发送事件日志
LogUpload.sendLog(oneEventLog)
println(oneEventLog)
Thread.sleep(100)
}
Thread.sleep(1000)
})
}
def main(args: Array[String]): Unit = {
// 测试
generateLog()
}
}
如果你想测试一下是否有数据可以把JsonMock下面两条信息注释掉,然后运行
LogUpload.sendLog(oneStartupLog)
LogUpload.sendLog(oneEventLog)