大数据日志分析项目mapreduce程序

总体思路:
使用flume将服务器上的日志传到hadoop上面,然后使用mapreduce程序完成数据清洗,统计pv,visit模型.最后使用azkaban定时执行程序.
用户每次登录根据session来判断.
本人亲自测试可以使用
原始日志字段说明:id,方法中文说明,登录人name,登录时间,操作耗时(毫秒),请求路径1,请求路径2,请求全路径,请求方式(get/post),浏览器信息,用户ip地址,请求页面,用户session
原始日志如下:

95367   后台首页    sw2 1529919971466   21  http://upms.zhangshuzheng.cn:1111   /manage/index   http://upms.zhangshuzheng.cn:1111/manage/index  GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/index.jsp"     548a66a9-e89c-401b-b1f0-503357ce72ae                
95366   登录  sw2 1529919971322   50  http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login POST    {validateCode=[2GRQ],password=[12345],rememberMe=[false],backurl=[],username=[sw2]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"}       548a66a9-e89c-401b-b1f0-503357ce72ae                
95365   登录      1529919964249   0   http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login POST    {validateCode=[FDEY],password=[12345],rememberMe=[false],backurl=[],username=[sw2]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"code":10107,"data":"请更换验证码!","message":"ValidateCode error"}      548a66a9-e89c-401b-b1f0-503357ce72ae                
95364   登录      1529919670205   2   http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/sso/login.jsp"        f1124085-8fdb-45e8-9a01-716153d24b11                
95363   退出登录        1529919670085   47  http://upms.zhangshuzheng.cn:1111   /sso/logout http://upms.zhangshuzheng.cn:1111/sso/logout    GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "redirect:http://upms.zhangshuzheng.cn:1111/manage/index"       2837e087-0958-4e47-ac4a-c94441199deb                
95362   查询字典    lzh 1529919651268   19  http://upms.zhangshuzheng.cn:1111   /manage/dictionary/select/sys   http://upms.zhangshuzheng.cn:1111/manage/dictionary/select/sys  GET sort=pkId&order=asc&offset=0&limit=50   Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":13,"rows":[{"code":"sys","ctime":1521769310000,"description":"系统","fatherCode":"sys","fatherDesc":"系统","pkId":3},{"code":"mg","ctime":1522120361000,"description":"密级","fatherCode":"sys","fatherDesc":"sys","pkId":33,"remarks":"档案秘密级别"},{"code":"preservationDate","ctime":1522121226000,"description":"保存期限","fatherCode":"sys","fatherDesc":"sys","pkId":40,"remarks":"设置文档的保存期限"},{"code":"tradition","ctime":1522132680000,"description":"传统归档","fatherCode":"sys","fatherDesc":"sys","pkId":44,"remarks":"传统归档"},{"code":"comArticle","ctime":1522134595000,"description":"来文","fatherCode":"sys","fatherDesc":"sys","pkId":51,"remarks":"简化整理--来文"},{"code":"sendArticle","ctime":1522135517000,"description":"发文","fatherCode":"sys","fatherDesc":"sys","pkId":56,"remarks":"简化整理--发文"},{"code":"innerArticle","ctime":1522137766000,"description":"内部文件","fatherCode":"sys","fatherDesc":"sys","pkId":63,"remarks":"简化整理--内部文件"},{"code":"singleArchive","ctime":1522139048000,"description":"单件","fatherCode":"sys","fatherDesc":"sys","pkId":71,"remarks":"简化管理--单件"},{"code":"separator","ctime":1522216114000,"description":"分隔符","fatherCode":"sys","fatherDesc":"sys","pkId":78,"remarks":"特殊字符符号"},{"code":"carrierType","ctime":1522380386000,"description":"载体类型","fatherCode":"sys","fatherDesc":"sys","pkId":94,"remarks":"档案的载体"},{"code":"archiveSource","ctime":1522381316000,"description":"档案来源","fatherCode":"sys","fatherDesc":"sys","pkId":98,"remarks":"档案的出处"},{"code":"abbreviation","ctime":1523347840000,"description":"门类简称","fatherCode":"sys","fatherDesc":"sys","pkId":109,"remarks":"门类号的简称"},{"code":"activitiCode","ctime":1524106881000,"description":"工作流定义","fatherCode":"sys","fatherDesc":"sys","pkId":120,"remarks":"应用于本项目的所有工作流"}]}  upms:dictionary:select  2837e087-0958-4e47-ac4a-c94441199deb                
95361   查询字典不分页 lzh 1529919651241   16  http://upms.zhangshuzheng.cn:1111   /manage/dictionary/selectNoPagination/sys   http://upms.zhangshuzheng.cn:1111/manage/dictionary/selectNoPagination/sys  GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":13,"rows":[{"code":"mg","ctime":1522120361000,"description":"密级","fatherCode":"sys","fatherDesc":"sys","pkId":33,"remarks":"档案秘密级别"},{"code":"preservationDate","ctime":1522121226000,"description":"保存期限","fatherCode":"sys","fatherDesc":"sys","pkId":40,"remarks":"设置文档的保存期限"},{"code":"tradition","ctime":1522132680000,"description":"传统归档","fatherCode":"sys","fatherDesc":"sys","pkId":44,"remarks":"传统归档"},{"code":"comArticle","ctime":1522134595000,"description":"来文","fatherCode":"sys","fatherDesc":"sys","pkId":51,"remarks":"简化整理--来文"},{"code":"sendArticle","ctime":1522135517000,"description":"发文","fatherCode":"sys","fatherDesc":"sys","pkId":56,"remarks":"简化整理--发文"},{"code":"innerArticle","ctime":1522137766000,"description":"内部文件","fatherCode":"sys","fatherDesc":"sys","pkId":63,"remarks":"简化整理--内部文件"},{"code":"carrierType","ctime":1522380386000,"description":"载体类型","fatherCode":"sys","fatherDesc":"sys","pkId":94,"remarks":"档案的载体"},{"code":"separator","ctime":1522216114000,"description":"分隔符","fatherCode":"sys","fatherDesc":"sys","pkId":78,"remarks":"特殊字符符号"},{"code":"activitiCode","ctime":1524106881000,"description":"工作流定义","fatherCode":"sys","fatherDesc":"sys","pkId":120,"remarks":"应用于本项目的所有工作流"},{"code":"sys","ctime":1521769310000,"description":"系统","fatherCode":"sys","fatherDesc":"系统","pkId":3},{"code":"abbreviation","ctime":1523347840000,"description":"门类简称","fatherCode":"sys","fatherDesc":"sys","pkId":109,"remarks":"门类号的简称"},{"code":"singleArchive","ctime":1522139048000,"description":"单件","fatherCode":"sys","fatherDesc":"sys","pkId":71,"remarks":"简化管理--单件"},{"code":"archiveSource","ctime":1522381316000,"description":"档案来源","fatherCode":"sys","fatherDesc":"sys","pkId":98,"remarks":"档案的出处"}]}  upms:dictionary:selectNoPagination  2837e087-0958-4e47-ac4a-c94441199deb                
95360   字典首页    lzh 1529919650618   10  http://upms.zhangshuzheng.cn:1111   /manage/dictionary/index    http://upms.zhangshuzheng.cn:1111/manage/dictionary/index   GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/dictionary/index.jsp"  upms:dictionary:read    2837e087-0958-4e47-ac4a-c94441199deb                
95359   全宗列表    lzh 1529919646915   64  http://upms.zhangshuzheng.cn:1111   /manage/fonds/list  http://upms.zhangshuzheng.cn:1111/manage/fonds/list POST    {}  Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   [{"ctime":1524758400000,"fondsId":13,"fondsName":"占地方","fondsNum":"000","mtime":1525190400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":1,"fondsName":"ZYP测试","fondsNum":"000","mtime":1525190400000,"str1":"0","str2":"0"},{"ctime":1525190400000,"fondsId":19,"fondsName":"lzh测试2","fondsNum":"001","str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":4,"fondsName":"301","fondsNum":"002","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":2,"fondsName":"ZYP测试","fondsNum":"003","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":5,"fondsName":"ZXY测试","fondsNum":"004","str1":"0","str2":"0"},{"ctime":1523462400000,"fondsId":6,"fondsName":"WXL测试","fondsNum":"005","str1":"0","str2":"0"},{"ctime":1523462400000,"fondsId":7,"fondsName":"SW测试","fondsNum":"006","str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":8,"fondsName":"LZH测试2","fondsNum":"007","str1":"0","str2":"0"},{"ctime":1524758400000,"fondsId":14,"fondsName":"1","fondsNum":"008","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1524758400000,"fondsId":16,"fondsName":"123123","fondsNum":"011","str1":"0","str2":"1"},{"ctime":1524758400000,"fondsId":15,"fondsName":"2","fondsNum":"012","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1526313600000,"fondsId":21,"fondsName":"测试99","fondsNum":"099","str1":"0"},{"ctime":1525190400000,"fondsId":20,"fondsName":"lzh测试","fondsNum":"60","mtime":1525190400000,"str1":"0","str2":"1"},{"ctime":1523894400000,"fondsId":9,"fondsName":"innoking","fondsNum":"YNJY","str1":"0","str2":"1"}]        2837e087-0958-4e47-ac4a-c94441199deb                
95358   查询保管年限  lzh 1529919646747   69  http://upms.zhangshuzheng.cn:1111   /manage/scope/preservationDate  http://upms.zhangshuzheng.cn:1111/manage/scope/preservationDate GET code=preservationDate   Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   [{"code":"D","ctime":1522121323000,"description":"短期","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":41,"remarks":"保管期限_短期(30年)"},{"code":"C","ctime":1522121397000,"description":"长期","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":42,"remarks":"保管期限_长期(60年)"},{"code":"Y","ctime":1522121727000,"description":"永久","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":43,"remarks":"保管期限_永久(无期限)"}]        2837e087-0958-4e47-ac4a-c94441199deb                
95357   分类首页    lzh 1529919645581   6   http://upms.zhangshuzheng.cn:1111   /manage/archivestype/index  http://upms.zhangshuzheng.cn:1111/manage/archivestype/index GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/archivestype/index.jsp"    upms:archivestype:read  2837e087-0958-4e47-ac4a-c94441199deb                
95356   个人资料首页  lzh 1529919643316   9   http://upms.zhangshuzheng.cn:1111   /manage/personalData/index  http://upms.zhangshuzheng.cn:1111/manage/personalData/index GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/personalData/index.jsp"    upms:personalData:read  2837e087-0958-4e47-ac4a-c94441199deb                
95355   后台首页    lzh 1529919639681   60  http://upms.zhangshuzheng.cn:1111   /manage/index   http://upms.zhangshuzheng.cn:1111/manage/index  GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/index.jsp"     2837e087-0958-4e47-ac4a-c94441199deb                
95354   登录  lzh 1529919639478   70  http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login POST    {validateCode=[wqby],password=[123456],rememberMe=[false],backurl=[],username=[lzh]}    Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"}       2837e087-0958-4e47-ac4a-c94441199deb                
95353   登录      1529919630737   2   http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/sso/login.jsp"        2837e087-0958-4e47-ac4a-c94441199deb                
95352   退出登录        1529919630594   61  http://upms.zhangshuzheng.cn:1111   /sso/logout http://upms.zhangshuzheng.cn:1111/sso/logout    GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "redirect:http://upms.zhangshuzheng.cn:1111/manage/index"       6b774f4d-9071-4443-a5b8-042e5e06aecc                
95351   权限列表    admin   1529919600561   62  http://upms.zhangshuzheng.cn:1111   /manage/permission/list http://upms.zhangshuzheng.cn:1111/manage/permission/list    GET sort=permissionId&order=asc&offset=0&limit=10   Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":148,"rows":[{"ctime":1,"icon":"zmdi zmdi-accounts-list","name":"系统组织管理","orders":1,"permissionId":1,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":2,"name":"系统管理","orders":2,"permissionId":2,"permissionValue":"upms:system:read","pid":1,"status":1,"systemId":1,"type":2,"uri":"/manage/system/index"},{"ctime":3,"name":"组织管理","orders":3,"permissionId":3,"permissionValue":"upms:organization:read","pid":1,"status":1,"systemId":1,"type":2,"uri":"/manage/organization/index"},{"ctime":4,"icon":"zmdi zmdi-accounts","name":"角色用户管理","orders":4,"permissionId":4,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":6,"name":"角色管理","orders":6,"permissionId":5,"permissionValue":"upms:role:read","pid":4,"status":1,"systemId":1,"type":2,"uri":"/manage/role/index"},{"ctime":5,"name":"用户管理","orders":5,"permissionId":6,"permissionValue":"upms:user:read","pid":4,"status":1,"systemId":1,"type":2,"uri":"/manage/user/index"},{"ctime":7,"icon":"zmdi zmdi-key","name":"权限资源管理","orders":7,"permissionId":7,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":12,"icon":"zmdi zmdi-settings","name":"基础数据管理","orders":12,"permissionId":12,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":14,"name":"会话管理","orders":6,"permissionId":14,"permissionValue":"upms:session:read","pid":12,"status":1,"systemId":1,"type":2,"uri":"/manage/session/index"},{"ctime":15,"name":"日志记录","orders":7,"permissionId":15,"permissionValue":"upms:log:read","pid":12,"status":1,"systemId":1,"type":2,"uri":"/manage/log/index"}]}   upms:permission:read    6b774f4d-9071-4443-a5b8-042e5e06aecc                
95350   权限首页    admin   1529919599346   7   http://upms.zhangshuzheng.cn:1111   /manage/permission/index    http://upms.zhangshuzheng.cn:1111/manage/permission/index   GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/permission/index.jsp"  upms:permission:read    6b774f4d-9071-4443-a5b8-042e5e06aecc                
95349   新增用户    admin   1529919520811   1   http://upms.zhangshuzheng.cn:1111   /manage/user/create http://upms.zhangshuzheng.cn:1111/manage/user/create    GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/user/create.jsp"   upms:user:create    6b774f4d-9071-4443-a5b8-042e5e06aecc                
95348   用户列表    admin   1529919518452   13  http://upms.zhangshuzheng.cn:1111   /manage/user/list   http://upms.zhangshuzheng.cn:1111/manage/user/list  GET sort=userId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":17,"rows":[{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrUQ4eARjBJAAA8goGXIF0975.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345644444444478","realname":"admin","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":1,"username":"admin"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"285C9762F5F9046F5893F752DFAF3476","phone":"123456","realname":"测试","salt":"d2d0d03310444ad388a8b290b0fe8564","sex":1,"userId":2,"username":"test"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1521633064135,"email":"404036459@qq.com","locked":0,"password":"AA96888C9725907F98EE856070E4714E","phone":"123456","realname":"王让123456","salt":"8762ea7bce62434d91bcc826a3b68fbb","sex":1,"userId":3,"username":"wr"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoDGAa0tLAAZ6_qAk5xs261.jpg","ctime":1523340217954,"email":"2787718835@qq.com","locked":0,"password":"5F8829FF056E2DD4E63FE9A08930821B","phone":"123456","realname":"李四","salt":"2292a2f818f44e5a9642248c8fbf2ebb","sex":1,"userId":4,"username":"lisi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoFWAJMw4AAS_HAFOT78808.jpg","ctime":1523340356218,"email":"123456","locked":0,"password":"B6BB728594211683056D632583C60BD0","phone":"123456","realname":"王五","salt":"25fb83ab85124dedbabb48a16bf0f00d","sex":1,"userId":5,"username":"wangwu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoG-ASVLiAAMTwHdlt4g707.jpg","ctime":1523341083756,"email":"123456","locked":0,"password":"B2F78048FCE97D37A5DD214EDE5A2DB7","phone":"12345678","realname":"老六","salt":"8264132985a041d8bb806e15770d6900","sex":1,"userId":6,"username":"laoliu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrWoViAFJeBAAA4c3mXMS4922.png","ctime":1523427110963,"email":"2787718835@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345678","realname":"赵四","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":15,"username":"zhaosi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoIWAZsgZAAW6kkGANi4261.jpg","ctime":1523431468775,"email":"123456@11.com","locked":0,"password":"000B22923056B62D23A57B3F567078B5","phone":"123456","realname":"著录喽啰","salt":"3e49c877739345cc9898a6c74f3f1644","sex":1,"userId":16,"username":"sw2"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrPIhmAXj8CAACGQ6XrTPc212.jpg","ctime":1523436240417,"locked":0,"password":"00278F41BD2A7DDD940D80A6EBB877F8","realname":"归档喽啰","salt":"3f67221d0d5a4dc9a9dc17e9d91dca6a","sex":1,"userId":18,"username":"sw4"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrQIoqALeMUAAIBtaolgQo231.jpg","ctime":1523439432590,"locked":0,"password":"EBB6A6283BECAA763484B45B5ECEF539","realname":"lzh","salt":"fbf6f778d40a4d56a9b53fdc7045cdb1","sex":1,"userId":19,"username":"lzh"}]} upms:user:read  6b774f4d-9071-4443-a5b8-042e5e06aecc                
95347   用户首页    admin   1529919517553   0   http://upms.zhangshuzheng.cn:1111   /manage/user/index  http://upms.zhangshuzheng.cn:1111/manage/user/index GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/user/index.jsp"    upms:user:read  6b774f4d-9071-4443-a5b8-042e5e06aecc                
95346   用户列表    admin   1529919495638   26  http://upms.zhangshuzheng.cn:1111   /manage/user/list   http://upms.zhangshuzheng.cn:1111/manage/user/list  GET sort=userId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":17,"rows":[{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrUQ4eARjBJAAA8goGXIF0975.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345644444444478","realname":"admin","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":1,"username":"admin"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"285C9762F5F9046F5893F752DFAF3476","phone":"123456","realname":"测试","salt":"d2d0d03310444ad388a8b290b0fe8564","sex":1,"userId":2,"username":"test"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1521633064135,"email":"404036459@qq.com","locked":0,"password":"AA96888C9725907F98EE856070E4714E","phone":"123456","realname":"王让123456","salt":"8762ea7bce62434d91bcc826a3b68fbb","sex":1,"userId":3,"username":"wr"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoDGAa0tLAAZ6_qAk5xs261.jpg","ctime":1523340217954,"email":"2787718835@qq.com","locked":0,"password":"5F8829FF056E2DD4E63FE9A08930821B","phone":"123456","realname":"李四","salt":"2292a2f818f44e5a9642248c8fbf2ebb","sex":1,"userId":4,"username":"lisi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoFWAJMw4AAS_HAFOT78808.jpg","ctime":1523340356218,"email":"123456","locked":0,"password":"B6BB728594211683056D632583C60BD0","phone":"123456","realname":"王五","salt":"25fb83ab85124dedbabb48a16bf0f00d","sex":1,"userId":5,"username":"wangwu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoG-ASVLiAAMTwHdlt4g707.jpg","ctime":1523341083756,"email":"123456","locked":0,"password":"B2F78048FCE97D37A5DD214EDE5A2DB7","phone":"12345678","realname":"老六","salt":"8264132985a041d8bb806e15770d6900","sex":1,"userId":6,"username":"laoliu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrWoViAFJeBAAA4c3mXMS4922.png","ctime":1523427110963,"email":"2787718835@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345678","realname":"赵四","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":15,"username":"zhaosi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoIWAZsgZAAW6kkGANi4261.jpg","ctime":1523431468775,"email":"123456@11.com","locked":0,"password":"000B22923056B62D23A57B3F567078B5","phone":"123456","realname":"著录喽啰","salt":"3e49c877739345cc9898a6c74f3f1644","sex":1,"userId":16,"username":"sw2"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrPIhmAXj8CAACGQ6XrTPc212.jpg","ctime":1523436240417,"locked":0,"password":"00278F41BD2A7DDD940D80A6EBB877F8","realname":"归档喽啰","salt":"3f67221d0d5a4dc9a9dc17e9d91dca6a","sex":1,"userId":18,"username":"sw4"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrQIoqALeMUAAIBtaolgQo231.jpg","ctime":1523439432590,"locked":0,"password":"EBB6A6283BECAA763484B45B5ECEF539","realname":"lzh","salt":"fbf6f778d40a4d56a9b53fdc7045cdb1","sex":1,"userId":19,"username":"lzh"}]} upms:user:read  6b774f4d-9071-4443-a5b8-042e5e06aecc                
95345   用户首页    admin   1529919494212   8   http://upms.zhangshuzheng.cn:1111   /manage/user/index  http://upms.zhangshuzheng.cn:1111/manage/user/index GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/user/index.jsp"    upms:user:read  6b774f4d-9071-4443-a5b8-042e5e06aecc                
95344   系统列表    admin   1529919394684   73  http://upms.zhangshuzheng.cn:1111   /manage/system/list http://upms.zhangshuzheng.cn:1111/manage/system/list    GET sort=systemId&order=asc&offset=0&limit=10   Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"total":2,"rows":[{"banner":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrYRkyAbNBDAABZE6_qhVI301.png","basepath":"http://upms.zhangshuzheng.cn:1111","ctime":1,"description":"用户权限管理系统(RBAC细粒度用户权限、统一后台、单点登录、会话管理)","icon":"zmdi zmdi-shield-security","name":"zheng-upms-server","orders":1,"status":1,"systemId":1,"theme":"#29a176","title":"权限管理系统"},{"banner":"/resources/zheng-admin/images/zheng-cms.png","basepath":"http://cms.zhangshuzheng.cn:2222","ctime":2,"description":"内容管理系统(门户、博客、论坛、问答等)","icon":"zmdi zmdi-wikipedia","name":"zheng-cms-admin","orders":2,"status":1,"systemId":2,"theme":"#455EC5","title":"内容管理系统"}]}    upms:system:read    6b774f4d-9071-4443-a5b8-042e5e06aecc                
95343   系统首页    admin   1529919392112   16  http://upms.zhangshuzheng.cn:1111   /manage/system/index    http://upms.zhangshuzheng.cn:1111/manage/system/index   GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/system/index.jsp"  upms:system:read    6b774f4d-9071-4443-a5b8-042e5e06aecc                
95342   后台首页    admin   1529919379219   152 http://upms.zhangshuzheng.cn:1111   /manage/index   http://upms.zhangshuzheng.cn:1111/manage/index  GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/manage/index.jsp"     6b774f4d-9071-4443-a5b8-042e5e06aecc                
95341   登录  admin   1529919377049   1483    http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login POST    {validateCode=[q48x],password=[123456],rememberMe=[false],backurl=[],username=[admin]}  Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"}       6b774f4d-9071-4443-a5b8-042e5e06aecc                
95340   登录      1529919354643   19  http://upms.zhangshuzheng.cn:1111   /sso/login  http://upms.zhangshuzheng.cn:1111/sso/login GET     Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36   127.0.0.1   "/sso/login.jsp"        6b774f4d-9071-4443-a5b8-042e5e06aecc                

1日志清洗

package cn.itcast.bigdata.hive.mr.pre;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import cn.itcast.bigdata.hive.mrbean.WebLogBean;
import cn.itcast.bigdata.hive.mrbean.WebLogParser;

public class WeblogPreValid {

    static class WeblogPreProcessMapper extends Mapper<LongWritable, Text, WebLogBean, NullWritable> {
        // 用来存储网站url分类数据
//      Set<String> pages = new HashSet<String>();
//      Text k = new Text();
//      NullWritable v = NullWritable.get();

        /**
         * 从外部加载网站url分类数据
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
//          pages.add("/about");
//          pages.add("/black-ip-list/");
//          pages.add("/cassandra-clustor/");
//          pages.add("/finance-rhive-repurchase/");
//          pages.add("/hadoop-family-roadmap/");
//          pages.add("/hadoop-hive-intro/");
//          pages.add("/hadoop-zookeeper-intro/");
//          pages.add("/hadoop-mahout-roadmap/");
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            WebLogBean webLogBean = WebLogParser.parser(line);
            // 过滤js/图片/css等静态资源
            //WebLogParser.filtStaticResource(webLogBean, pages);
            //如果是标记为无效的数据,就不输出
            if (webLogBean.isValid()) {
                //k.set(webLogBean.getLogId());
                //context.write(k, webLogBean);
                context.write(webLogBean, NullWritable.get());
            }
        }

    }

    static class WeblogPreProcessReducer extends Reducer<WebLogBean, NullWritable, WebLogBean, NullWritable>{

        @Override
        protected void reduce(WebLogBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key, NullWritable.get());
        }

    }


    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(WeblogPreValid.class);

        job.setMapperClass(WeblogPreProcessMapper.class);
        job.setReducerClass(WeblogPreProcessReducer.class);

        job.setMapOutputKeyClass(WebLogBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(WebLogBean.class);
        job.setOutputValueClass(NullWritable.class);

         FileInputFormat.setInputPaths(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job, new Path(args[1]));
//      FileInputFormat.setInputPaths(job, new Path("C:/weblog/zhenginput/"));
//      FileOutputFormat.setOutputPath(job, new Path("C:/weblog/zhengoutput/"));

        job.waitForCompletion(true);
    }

}

2统计pv

package cn.itcast.bigdata.hive.mr.pre;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import cn.itcast.bigdata.hive.mrbean.WebLogBean;
import cn.itcast.bigdata.hive.mrbean.WebLogParser;


/**
 * 处理原始日志,过滤出真实pv请求
 * 转换时间格式
 * 对缺失字段填充默认值
 * 对记录标记valid和invalid
 * 
 * @author
 *
 */

public class WeblogPreProcess {

    static class WeblogPreProcessMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
        //用来存储网站url分类数据
        Set<String> pages = new HashSet<String>();
        Text k = new Text();
        NullWritable v = NullWritable.get();

        /**
         * 从外部加载网站url分类数据
         * 需要被过滤掉的数据
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            pages.add("/about");
            pages.add("/black-ip-list/");
            pages.add("/cassandra-clustor/");
            pages.add("/finance-rhive-repurchase/");
            pages.add("/hadoop-family-roadmap/");
            pages.add("/hadoop-hive-intro/");
            pages.add("/hadoop-zookeeper-intro/");
            pages.add("/hadoop-mahout-roadmap/");

        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            WebLogBean webLogBean = WebLogParser.parser(line);
            // 过滤js/图片/css等静态资源
            WebLogParser.filtStaticResource(webLogBean, pages);
            if (!webLogBean.isValid()) return; 
            k.set(webLogBean.toString());
            context.write(k, v);
        }

    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(WeblogPreProcess.class);

        job.setMapperClass(WeblogPreProcessMapper.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

//       FileInputFormat.setInputPaths(job, new Path(args[0]));
//       FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileInputFormat.setInputPaths(job, new Path("c:/weblog/zhengoutput"));
        FileOutputFormat.setOutputPath(job, new Path("c:/weblog/zhengoutputPre"));

        job.setNumReduceTasks(0);

        job.waitForCompletion(true);

    }

}

3统计visit模型

package cn.itcast.bigdata.hive.mr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import cn.itcast.bigdata.hive.mrbean.PageViewsBean;
import cn.itcast.bigdata.hive.mrbean.VisitBean;


/**
 * 从pageviews模型结果数据中进一步梳理出visit模型
 * sessionid  start-time   out-time   start-page   out-page   pagecounts  ......
 * 
 * @author
 *
 */
public class ClickStreamVisit {

    // 以session作为key,发送数据到reducer
    static class ClickStreamVisitMapper extends Mapper<LongWritable, Text, Text, PageViewsBean> {

        PageViewsBean pvBean = new PageViewsBean();
        Text k = new Text();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            String[] fields = line.split("\001");
            int step = Integer.parseInt(fields[5]);
            //String session, String remote_addr, String useragent, 
            //String timestr, String request, int step, String staylong,
            //String referal, String bytes_send, String status
            //true95364登录15299196702052http://upms.zhangshuzheng.cn:1111/sso/loginhttp://upms.zhangshuzheng.cn:1111/sso/loginGETMozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36127.0.0.1f1124085-8fdb-45e8-9a01-716153d24b11
            pvBean.set(fields[15], fields[12], fields[11], 
                    fields[4],fields[7], Integer.valueOf(fields[18]), fields[5], 
                    fields[17], "", "200");
            k.set(pvBean.getSession());
            context.write(k, pvBean);

        }

    }

    static class ClickStreamVisitReducer extends Reducer<Text, PageViewsBean, NullWritable, VisitBean> {

        @Override
        protected void reduce(Text session, Iterable<PageViewsBean> pvBeans, Context context) throws IOException, InterruptedException {

            // 将pvBeans按照step排序
            ArrayList<PageViewsBean> pvBeansList = new ArrayList<PageViewsBean>();
            for (PageViewsBean pvBean : pvBeans) {
                PageViewsBean bean = new PageViewsBean();
                try {
                    BeanUtils.copyProperties(bean, pvBean);
                    pvBeansList.add(bean);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }

            Collections.sort(pvBeansList, new Comparator<PageViewsBean>() {

                @Override
                public int compare(PageViewsBean o1, PageViewsBean o2) {

                    return o1.getStep() > o2.getStep() ? 1 : -1;
                }
            });

            // 取这次visit的首尾pageview记录,将数据放入VisitBean中
            VisitBean visitBean = new VisitBean();
            // 取visit的首记录
            visitBean.setInPage(pvBeansList.get(0).getRequest());
            visitBean.setInTime(pvBeansList.get(0).getTimestr());
            // 取visit的尾记录
            visitBean.setOutPage(pvBeansList.get(pvBeansList.size() - 1).getRequest());
            visitBean.setOutTime(pvBeansList.get(pvBeansList.size() - 1).getTimestr());
            // visit访问的页面数
            visitBean.setPageVisits(pvBeansList.size());
            // 来访者的ip
            visitBean.setRemote_addr(pvBeansList.get(0).getRemote_addr());
            // 本次visit的referal
            visitBean.setReferal(pvBeansList.get(0).getReferal());
            visitBean.setSession(session.toString());

            context.write(NullWritable.get(), visitBean);

        }

    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(ClickStreamVisit.class);

        job.setMapperClass(ClickStreamVisitMapper.class);
        job.setReducerClass(ClickStreamVisitReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(PageViewsBean.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(VisitBean.class);


        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
//      FileInputFormat.setInputPaths(job, new Path("c:/weblog/zhengpageviews"));
//      FileOutputFormat.setOutputPath(job, new Path("c:/weblog/zhengvisitout"));

        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);

    }

}
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值