总体思路:
使用flume将服务器上的日志传到hadoop上面,然后使用mapreduce程序完成数据清洗,统计pv,visit模型.最后使用azkaban定时执行程序.
用户每次登录根据session来判断.
本人亲自测试可以使用
原始日志字段说明:id,方法中文说明,登录人name,登录时间,操作耗时(毫秒),请求路径1,请求路径2,请求全路径,请求方式(get/post),浏览器信息,用户ip地址,请求页面,用户session
原始日志如下:
95367 后台首页 sw2 1529919971466 21 http://upms.zhangshuzheng.cn:1111 /manage/index http://upms.zhangshuzheng.cn:1111/manage/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/index.jsp" 548a66a9-e89c-401b-b1f0-503357ce72ae
95366 登录 sw2 1529919971322 50 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login POST {validateCode=[2GRQ],password=[12345],rememberMe=[false],backurl=[],username=[sw2]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"} 548a66a9-e89c-401b-b1f0-503357ce72ae
95365 登录 1529919964249 0 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login POST {validateCode=[FDEY],password=[12345],rememberMe=[false],backurl=[],username=[sw2]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"code":10107,"data":"请更换验证码!","message":"ValidateCode error"} 548a66a9-e89c-401b-b1f0-503357ce72ae
95364 登录 1529919670205 2 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/sso/login.jsp" f1124085-8fdb-45e8-9a01-716153d24b11
95363 退出登录 1529919670085 47 http://upms.zhangshuzheng.cn:1111 /sso/logout http://upms.zhangshuzheng.cn:1111/sso/logout GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "redirect:http://upms.zhangshuzheng.cn:1111/manage/index" 2837e087-0958-4e47-ac4a-c94441199deb
95362 查询字典 lzh 1529919651268 19 http://upms.zhangshuzheng.cn:1111 /manage/dictionary/select/sys http://upms.zhangshuzheng.cn:1111/manage/dictionary/select/sys GET sort=pkId&order=asc&offset=0&limit=50 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":13,"rows":[{"code":"sys","ctime":1521769310000,"description":"系统","fatherCode":"sys","fatherDesc":"系统","pkId":3},{"code":"mg","ctime":1522120361000,"description":"密级","fatherCode":"sys","fatherDesc":"sys","pkId":33,"remarks":"档案秘密级别"},{"code":"preservationDate","ctime":1522121226000,"description":"保存期限","fatherCode":"sys","fatherDesc":"sys","pkId":40,"remarks":"设置文档的保存期限"},{"code":"tradition","ctime":1522132680000,"description":"传统归档","fatherCode":"sys","fatherDesc":"sys","pkId":44,"remarks":"传统归档"},{"code":"comArticle","ctime":1522134595000,"description":"来文","fatherCode":"sys","fatherDesc":"sys","pkId":51,"remarks":"简化整理--来文"},{"code":"sendArticle","ctime":1522135517000,"description":"发文","fatherCode":"sys","fatherDesc":"sys","pkId":56,"remarks":"简化整理--发文"},{"code":"innerArticle","ctime":1522137766000,"description":"内部文件","fatherCode":"sys","fatherDesc":"sys","pkId":63,"remarks":"简化整理--内部文件"},{"code":"singleArchive","ctime":1522139048000,"description":"单件","fatherCode":"sys","fatherDesc":"sys","pkId":71,"remarks":"简化管理--单件"},{"code":"separator","ctime":1522216114000,"description":"分隔符","fatherCode":"sys","fatherDesc":"sys","pkId":78,"remarks":"特殊字符符号"},{"code":"carrierType","ctime":1522380386000,"description":"载体类型","fatherCode":"sys","fatherDesc":"sys","pkId":94,"remarks":"档案的载体"},{"code":"archiveSource","ctime":1522381316000,"description":"档案来源","fatherCode":"sys","fatherDesc":"sys","pkId":98,"remarks":"档案的出处"},{"code":"abbreviation","ctime":1523347840000,"description":"门类简称","fatherCode":"sys","fatherDesc":"sys","pkId":109,"remarks":"门类号的简称"},{"code":"activitiCode","ctime":1524106881000,"description":"工作流定义","fatherCode":"sys","fatherDesc":"sys","pkId":120,"remarks":"应用于本项目的所有工作流"}]} upms:dictionary:select 2837e087-0958-4e47-ac4a-c94441199deb
95361 查询字典不分页 lzh 1529919651241 16 http://upms.zhangshuzheng.cn:1111 /manage/dictionary/selectNoPagination/sys http://upms.zhangshuzheng.cn:1111/manage/dictionary/selectNoPagination/sys GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":13,"rows":[{"code":"mg","ctime":1522120361000,"description":"密级","fatherCode":"sys","fatherDesc":"sys","pkId":33,"remarks":"档案秘密级别"},{"code":"preservationDate","ctime":1522121226000,"description":"保存期限","fatherCode":"sys","fatherDesc":"sys","pkId":40,"remarks":"设置文档的保存期限"},{"code":"tradition","ctime":1522132680000,"description":"传统归档","fatherCode":"sys","fatherDesc":"sys","pkId":44,"remarks":"传统归档"},{"code":"comArticle","ctime":1522134595000,"description":"来文","fatherCode":"sys","fatherDesc":"sys","pkId":51,"remarks":"简化整理--来文"},{"code":"sendArticle","ctime":1522135517000,"description":"发文","fatherCode":"sys","fatherDesc":"sys","pkId":56,"remarks":"简化整理--发文"},{"code":"innerArticle","ctime":1522137766000,"description":"内部文件","fatherCode":"sys","fatherDesc":"sys","pkId":63,"remarks":"简化整理--内部文件"},{"code":"carrierType","ctime":1522380386000,"description":"载体类型","fatherCode":"sys","fatherDesc":"sys","pkId":94,"remarks":"档案的载体"},{"code":"separator","ctime":1522216114000,"description":"分隔符","fatherCode":"sys","fatherDesc":"sys","pkId":78,"remarks":"特殊字符符号"},{"code":"activitiCode","ctime":1524106881000,"description":"工作流定义","fatherCode":"sys","fatherDesc":"sys","pkId":120,"remarks":"应用于本项目的所有工作流"},{"code":"sys","ctime":1521769310000,"description":"系统","fatherCode":"sys","fatherDesc":"系统","pkId":3},{"code":"abbreviation","ctime":1523347840000,"description":"门类简称","fatherCode":"sys","fatherDesc":"sys","pkId":109,"remarks":"门类号的简称"},{"code":"singleArchive","ctime":1522139048000,"description":"单件","fatherCode":"sys","fatherDesc":"sys","pkId":71,"remarks":"简化管理--单件"},{"code":"archiveSource","ctime":1522381316000,"description":"档案来源","fatherCode":"sys","fatherDesc":"sys","pkId":98,"remarks":"档案的出处"}]} upms:dictionary:selectNoPagination 2837e087-0958-4e47-ac4a-c94441199deb
95360 字典首页 lzh 1529919650618 10 http://upms.zhangshuzheng.cn:1111 /manage/dictionary/index http://upms.zhangshuzheng.cn:1111/manage/dictionary/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/dictionary/index.jsp" upms:dictionary:read 2837e087-0958-4e47-ac4a-c94441199deb
95359 全宗列表 lzh 1529919646915 64 http://upms.zhangshuzheng.cn:1111 /manage/fonds/list http://upms.zhangshuzheng.cn:1111/manage/fonds/list POST {} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 [{"ctime":1524758400000,"fondsId":13,"fondsName":"占地方","fondsNum":"000","mtime":1525190400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":1,"fondsName":"ZYP测试","fondsNum":"000","mtime":1525190400000,"str1":"0","str2":"0"},{"ctime":1525190400000,"fondsId":19,"fondsName":"lzh测试2","fondsNum":"001","str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":4,"fondsName":"301","fondsNum":"002","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":2,"fondsName":"ZYP测试","fondsNum":"003","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":5,"fondsName":"ZXY测试","fondsNum":"004","str1":"0","str2":"0"},{"ctime":1523462400000,"fondsId":6,"fondsName":"WXL测试","fondsNum":"005","str1":"0","str2":"0"},{"ctime":1523462400000,"fondsId":7,"fondsName":"SW测试","fondsNum":"006","str1":"0","str2":"1"},{"ctime":1523462400000,"fondsId":8,"fondsName":"LZH测试2","fondsNum":"007","str1":"0","str2":"0"},{"ctime":1524758400000,"fondsId":14,"fondsName":"1","fondsNum":"008","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1524758400000,"fondsId":16,"fondsName":"123123","fondsNum":"011","str1":"0","str2":"1"},{"ctime":1524758400000,"fondsId":15,"fondsName":"2","fondsNum":"012","mtime":1524758400000,"str1":"0","str2":"1"},{"ctime":1526313600000,"fondsId":21,"fondsName":"测试99","fondsNum":"099","str1":"0"},{"ctime":1525190400000,"fondsId":20,"fondsName":"lzh测试","fondsNum":"60","mtime":1525190400000,"str1":"0","str2":"1"},{"ctime":1523894400000,"fondsId":9,"fondsName":"innoking","fondsNum":"YNJY","str1":"0","str2":"1"}] 2837e087-0958-4e47-ac4a-c94441199deb
95358 查询保管年限 lzh 1529919646747 69 http://upms.zhangshuzheng.cn:1111 /manage/scope/preservationDate http://upms.zhangshuzheng.cn:1111/manage/scope/preservationDate GET code=preservationDate Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 [{"code":"D","ctime":1522121323000,"description":"短期","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":41,"remarks":"保管期限_短期(30年)"},{"code":"C","ctime":1522121397000,"description":"长期","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":42,"remarks":"保管期限_长期(60年)"},{"code":"Y","ctime":1522121727000,"description":"永久","fatherCode":"preservationDate","fatherDesc":"preservationDate","pkId":43,"remarks":"保管期限_永久(无期限)"}] 2837e087-0958-4e47-ac4a-c94441199deb
95357 分类首页 lzh 1529919645581 6 http://upms.zhangshuzheng.cn:1111 /manage/archivestype/index http://upms.zhangshuzheng.cn:1111/manage/archivestype/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/archivestype/index.jsp" upms:archivestype:read 2837e087-0958-4e47-ac4a-c94441199deb
95356 个人资料首页 lzh 1529919643316 9 http://upms.zhangshuzheng.cn:1111 /manage/personalData/index http://upms.zhangshuzheng.cn:1111/manage/personalData/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/personalData/index.jsp" upms:personalData:read 2837e087-0958-4e47-ac4a-c94441199deb
95355 后台首页 lzh 1529919639681 60 http://upms.zhangshuzheng.cn:1111 /manage/index http://upms.zhangshuzheng.cn:1111/manage/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/index.jsp" 2837e087-0958-4e47-ac4a-c94441199deb
95354 登录 lzh 1529919639478 70 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login POST {validateCode=[wqby],password=[123456],rememberMe=[false],backurl=[],username=[lzh]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"} 2837e087-0958-4e47-ac4a-c94441199deb
95353 登录 1529919630737 2 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/sso/login.jsp" 2837e087-0958-4e47-ac4a-c94441199deb
95352 退出登录 1529919630594 61 http://upms.zhangshuzheng.cn:1111 /sso/logout http://upms.zhangshuzheng.cn:1111/sso/logout GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "redirect:http://upms.zhangshuzheng.cn:1111/manage/index" 6b774f4d-9071-4443-a5b8-042e5e06aecc
95351 权限列表 admin 1529919600561 62 http://upms.zhangshuzheng.cn:1111 /manage/permission/list http://upms.zhangshuzheng.cn:1111/manage/permission/list GET sort=permissionId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":148,"rows":[{"ctime":1,"icon":"zmdi zmdi-accounts-list","name":"系统组织管理","orders":1,"permissionId":1,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":2,"name":"系统管理","orders":2,"permissionId":2,"permissionValue":"upms:system:read","pid":1,"status":1,"systemId":1,"type":2,"uri":"/manage/system/index"},{"ctime":3,"name":"组织管理","orders":3,"permissionId":3,"permissionValue":"upms:organization:read","pid":1,"status":1,"systemId":1,"type":2,"uri":"/manage/organization/index"},{"ctime":4,"icon":"zmdi zmdi-accounts","name":"角色用户管理","orders":4,"permissionId":4,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":6,"name":"角色管理","orders":6,"permissionId":5,"permissionValue":"upms:role:read","pid":4,"status":1,"systemId":1,"type":2,"uri":"/manage/role/index"},{"ctime":5,"name":"用户管理","orders":5,"permissionId":6,"permissionValue":"upms:user:read","pid":4,"status":1,"systemId":1,"type":2,"uri":"/manage/user/index"},{"ctime":7,"icon":"zmdi zmdi-key","name":"权限资源管理","orders":7,"permissionId":7,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":12,"icon":"zmdi zmdi-settings","name":"基础数据管理","orders":12,"permissionId":12,"pid":0,"status":1,"systemId":1,"type":1},{"ctime":14,"name":"会话管理","orders":6,"permissionId":14,"permissionValue":"upms:session:read","pid":12,"status":1,"systemId":1,"type":2,"uri":"/manage/session/index"},{"ctime":15,"name":"日志记录","orders":7,"permissionId":15,"permissionValue":"upms:log:read","pid":12,"status":1,"systemId":1,"type":2,"uri":"/manage/log/index"}]} upms:permission:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95350 权限首页 admin 1529919599346 7 http://upms.zhangshuzheng.cn:1111 /manage/permission/index http://upms.zhangshuzheng.cn:1111/manage/permission/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/permission/index.jsp" upms:permission:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95349 新增用户 admin 1529919520811 1 http://upms.zhangshuzheng.cn:1111 /manage/user/create http://upms.zhangshuzheng.cn:1111/manage/user/create GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/user/create.jsp" upms:user:create 6b774f4d-9071-4443-a5b8-042e5e06aecc
95348 用户列表 admin 1529919518452 13 http://upms.zhangshuzheng.cn:1111 /manage/user/list http://upms.zhangshuzheng.cn:1111/manage/user/list GET sort=userId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":17,"rows":[{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrUQ4eARjBJAAA8goGXIF0975.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345644444444478","realname":"admin","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":1,"username":"admin"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"285C9762F5F9046F5893F752DFAF3476","phone":"123456","realname":"测试","salt":"d2d0d03310444ad388a8b290b0fe8564","sex":1,"userId":2,"username":"test"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1521633064135,"email":"404036459@qq.com","locked":0,"password":"AA96888C9725907F98EE856070E4714E","phone":"123456","realname":"王让123456","salt":"8762ea7bce62434d91bcc826a3b68fbb","sex":1,"userId":3,"username":"wr"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoDGAa0tLAAZ6_qAk5xs261.jpg","ctime":1523340217954,"email":"2787718835@qq.com","locked":0,"password":"5F8829FF056E2DD4E63FE9A08930821B","phone":"123456","realname":"李四","salt":"2292a2f818f44e5a9642248c8fbf2ebb","sex":1,"userId":4,"username":"lisi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoFWAJMw4AAS_HAFOT78808.jpg","ctime":1523340356218,"email":"123456","locked":0,"password":"B6BB728594211683056D632583C60BD0","phone":"123456","realname":"王五","salt":"25fb83ab85124dedbabb48a16bf0f00d","sex":1,"userId":5,"username":"wangwu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoG-ASVLiAAMTwHdlt4g707.jpg","ctime":1523341083756,"email":"123456","locked":0,"password":"B2F78048FCE97D37A5DD214EDE5A2DB7","phone":"12345678","realname":"老六","salt":"8264132985a041d8bb806e15770d6900","sex":1,"userId":6,"username":"laoliu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrWoViAFJeBAAA4c3mXMS4922.png","ctime":1523427110963,"email":"2787718835@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345678","realname":"赵四","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":15,"username":"zhaosi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoIWAZsgZAAW6kkGANi4261.jpg","ctime":1523431468775,"email":"123456@11.com","locked":0,"password":"000B22923056B62D23A57B3F567078B5","phone":"123456","realname":"著录喽啰","salt":"3e49c877739345cc9898a6c74f3f1644","sex":1,"userId":16,"username":"sw2"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrPIhmAXj8CAACGQ6XrTPc212.jpg","ctime":1523436240417,"locked":0,"password":"00278F41BD2A7DDD940D80A6EBB877F8","realname":"归档喽啰","salt":"3f67221d0d5a4dc9a9dc17e9d91dca6a","sex":1,"userId":18,"username":"sw4"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrQIoqALeMUAAIBtaolgQo231.jpg","ctime":1523439432590,"locked":0,"password":"EBB6A6283BECAA763484B45B5ECEF539","realname":"lzh","salt":"fbf6f778d40a4d56a9b53fdc7045cdb1","sex":1,"userId":19,"username":"lzh"}]} upms:user:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95347 用户首页 admin 1529919517553 0 http://upms.zhangshuzheng.cn:1111 /manage/user/index http://upms.zhangshuzheng.cn:1111/manage/user/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/user/index.jsp" upms:user:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95346 用户列表 admin 1529919495638 26 http://upms.zhangshuzheng.cn:1111 /manage/user/list http://upms.zhangshuzheng.cn:1111/manage/user/list GET sort=userId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":17,"rows":[{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrUQ4eARjBJAAA8goGXIF0975.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345644444444478","realname":"admin","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":1,"username":"admin"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1,"email":"469741414@qq.com","locked":0,"password":"285C9762F5F9046F5893F752DFAF3476","phone":"123456","realname":"测试","salt":"d2d0d03310444ad388a8b290b0fe8564","sex":1,"userId":2,"username":"test"},{"avatar":"/resources/zheng-admin/images/avatar.jpg","ctime":1521633064135,"email":"404036459@qq.com","locked":0,"password":"AA96888C9725907F98EE856070E4714E","phone":"123456","realname":"王让123456","salt":"8762ea7bce62434d91bcc826a3b68fbb","sex":1,"userId":3,"username":"wr"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoDGAa0tLAAZ6_qAk5xs261.jpg","ctime":1523340217954,"email":"2787718835@qq.com","locked":0,"password":"5F8829FF056E2DD4E63FE9A08930821B","phone":"123456","realname":"李四","salt":"2292a2f818f44e5a9642248c8fbf2ebb","sex":1,"userId":4,"username":"lisi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoFWAJMw4AAS_HAFOT78808.jpg","ctime":1523340356218,"email":"123456","locked":0,"password":"B6BB728594211683056D632583C60BD0","phone":"123456","realname":"王五","salt":"25fb83ab85124dedbabb48a16bf0f00d","sex":1,"userId":5,"username":"wangwu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoG-ASVLiAAMTwHdlt4g707.jpg","ctime":1523341083756,"email":"123456","locked":0,"password":"B2F78048FCE97D37A5DD214EDE5A2DB7","phone":"12345678","realname":"老六","salt":"8264132985a041d8bb806e15770d6900","sex":1,"userId":6,"username":"laoliu"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrWoViAFJeBAAA4c3mXMS4922.png","ctime":1523427110963,"email":"2787718835@qq.com","locked":0,"password":"D13EFAD95FBA09971C3665C1C04F7C46","phone":"12345678","realname":"赵四","salt":"2fda5019e1e74fd1b90d14bee18bdc0e","sex":0,"userId":15,"username":"zhaosi"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrhoIWAZsgZAAW6kkGANi4261.jpg","ctime":1523431468775,"email":"123456@11.com","locked":0,"password":"000B22923056B62D23A57B3F567078B5","phone":"123456","realname":"著录喽啰","salt":"3e49c877739345cc9898a6c74f3f1644","sex":1,"userId":16,"username":"sw2"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrPIhmAXj8CAACGQ6XrTPc212.jpg","ctime":1523436240417,"locked":0,"password":"00278F41BD2A7DDD940D80A6EBB877F8","realname":"归档喽啰","salt":"3f67221d0d5a4dc9a9dc17e9d91dca6a","sex":1,"userId":18,"username":"sw4"},{"avatar":"http://192.168.10.226:8111/group1/M00/00/01/wKgK4lrQIoqALeMUAAIBtaolgQo231.jpg","ctime":1523439432590,"locked":0,"password":"EBB6A6283BECAA763484B45B5ECEF539","realname":"lzh","salt":"fbf6f778d40a4d56a9b53fdc7045cdb1","sex":1,"userId":19,"username":"lzh"}]} upms:user:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95345 用户首页 admin 1529919494212 8 http://upms.zhangshuzheng.cn:1111 /manage/user/index http://upms.zhangshuzheng.cn:1111/manage/user/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/user/index.jsp" upms:user:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95344 系统列表 admin 1529919394684 73 http://upms.zhangshuzheng.cn:1111 /manage/system/list http://upms.zhangshuzheng.cn:1111/manage/system/list GET sort=systemId&order=asc&offset=0&limit=10 Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"total":2,"rows":[{"banner":"http://192.168.10.226:8111/group1/M00/00/06/wKgK4lrYRkyAbNBDAABZE6_qhVI301.png","basepath":"http://upms.zhangshuzheng.cn:1111","ctime":1,"description":"用户权限管理系统(RBAC细粒度用户权限、统一后台、单点登录、会话管理)","icon":"zmdi zmdi-shield-security","name":"zheng-upms-server","orders":1,"status":1,"systemId":1,"theme":"#29a176","title":"权限管理系统"},{"banner":"/resources/zheng-admin/images/zheng-cms.png","basepath":"http://cms.zhangshuzheng.cn:2222","ctime":2,"description":"内容管理系统(门户、博客、论坛、问答等)","icon":"zmdi zmdi-wikipedia","name":"zheng-cms-admin","orders":2,"status":1,"systemId":2,"theme":"#455EC5","title":"内容管理系统"}]} upms:system:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95343 系统首页 admin 1529919392112 16 http://upms.zhangshuzheng.cn:1111 /manage/system/index http://upms.zhangshuzheng.cn:1111/manage/system/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/system/index.jsp" upms:system:read 6b774f4d-9071-4443-a5b8-042e5e06aecc
95342 后台首页 admin 1529919379219 152 http://upms.zhangshuzheng.cn:1111 /manage/index http://upms.zhangshuzheng.cn:1111/manage/index GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/manage/index.jsp" 6b774f4d-9071-4443-a5b8-042e5e06aecc
95341 登录 admin 1529919377049 1483 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login POST {validateCode=[q48x],password=[123456],rememberMe=[false],backurl=[],username=[admin]} Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 {"code":1,"data":"http://upms.zhangshuzheng.cn:1111","message":"success"} 6b774f4d-9071-4443-a5b8-042e5e06aecc
95340 登录 1529919354643 19 http://upms.zhangshuzheng.cn:1111 /sso/login http://upms.zhangshuzheng.cn:1111/sso/login GET Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 127.0.0.1 "/sso/login.jsp" 6b774f4d-9071-4443-a5b8-042e5e06aecc
1日志清洗
package cn.itcast.bigdata.hive.mr.pre;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import cn.itcast.bigdata.hive.mrbean.WebLogBean;
import cn.itcast.bigdata.hive.mrbean.WebLogParser;
public class WeblogPreValid {
static class WeblogPreProcessMapper extends Mapper<LongWritable, Text, WebLogBean, NullWritable> {
// 用来存储网站url分类数据
// Set<String> pages = new HashSet<String>();
// Text k = new Text();
// NullWritable v = NullWritable.get();
/**
* 从外部加载网站url分类数据
*/
@Override
protected void setup(Context context) throws IOException, InterruptedException {
// pages.add("/about");
// pages.add("/black-ip-list/");
// pages.add("/cassandra-clustor/");
// pages.add("/finance-rhive-repurchase/");
// pages.add("/hadoop-family-roadmap/");
// pages.add("/hadoop-hive-intro/");
// pages.add("/hadoop-zookeeper-intro/");
// pages.add("/hadoop-mahout-roadmap/");
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
WebLogBean webLogBean = WebLogParser.parser(line);
// 过滤js/图片/css等静态资源
//WebLogParser.filtStaticResource(webLogBean, pages);
//如果是标记为无效的数据,就不输出
if (webLogBean.isValid()) {
//k.set(webLogBean.getLogId());
//context.write(k, webLogBean);
context.write(webLogBean, NullWritable.get());
}
}
}
static class WeblogPreProcessReducer extends Reducer<WebLogBean, NullWritable, WebLogBean, NullWritable>{
@Override
protected void reduce(WebLogBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(WeblogPreValid.class);
job.setMapperClass(WeblogPreProcessMapper.class);
job.setReducerClass(WeblogPreProcessReducer.class);
job.setMapOutputKeyClass(WebLogBean.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(WebLogBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// FileInputFormat.setInputPaths(job, new Path("C:/weblog/zhenginput/"));
// FileOutputFormat.setOutputPath(job, new Path("C:/weblog/zhengoutput/"));
job.waitForCompletion(true);
}
}
2统计pv
package cn.itcast.bigdata.hive.mr.pre;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import cn.itcast.bigdata.hive.mrbean.WebLogBean;
import cn.itcast.bigdata.hive.mrbean.WebLogParser;
/**
* 处理原始日志,过滤出真实pv请求
* 转换时间格式
* 对缺失字段填充默认值
* 对记录标记valid和invalid
*
* @author
*
*/
public class WeblogPreProcess {
static class WeblogPreProcessMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
//用来存储网站url分类数据
Set<String> pages = new HashSet<String>();
Text k = new Text();
NullWritable v = NullWritable.get();
/**
* 从外部加载网站url分类数据
* 需要被过滤掉的数据
*/
@Override
protected void setup(Context context) throws IOException, InterruptedException {
pages.add("/about");
pages.add("/black-ip-list/");
pages.add("/cassandra-clustor/");
pages.add("/finance-rhive-repurchase/");
pages.add("/hadoop-family-roadmap/");
pages.add("/hadoop-hive-intro/");
pages.add("/hadoop-zookeeper-intro/");
pages.add("/hadoop-mahout-roadmap/");
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
WebLogBean webLogBean = WebLogParser.parser(line);
// 过滤js/图片/css等静态资源
WebLogParser.filtStaticResource(webLogBean, pages);
if (!webLogBean.isValid()) return;
k.set(webLogBean.toString());
context.write(k, v);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(WeblogPreProcess.class);
job.setMapperClass(WeblogPreProcessMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// FileInputFormat.setInputPaths(job, new Path(args[0]));
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileInputFormat.setInputPaths(job, new Path("c:/weblog/zhengoutput"));
FileOutputFormat.setOutputPath(job, new Path("c:/weblog/zhengoutputPre"));
job.setNumReduceTasks(0);
job.waitForCompletion(true);
}
}
3统计visit模型
package cn.itcast.bigdata.hive.mr;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import cn.itcast.bigdata.hive.mrbean.PageViewsBean;
import cn.itcast.bigdata.hive.mrbean.VisitBean;
/**
* 从pageviews模型结果数据中进一步梳理出visit模型
* sessionid start-time out-time start-page out-page pagecounts ......
*
* @author
*
*/
public class ClickStreamVisit {
// 以session作为key,发送数据到reducer
static class ClickStreamVisitMapper extends Mapper<LongWritable, Text, Text, PageViewsBean> {
PageViewsBean pvBean = new PageViewsBean();
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\001");
int step = Integer.parseInt(fields[5]);
//String session, String remote_addr, String useragent,
//String timestr, String request, int step, String staylong,
//String referal, String bytes_send, String status
//true95364登录15299196702052http://upms.zhangshuzheng.cn:1111/sso/loginhttp://upms.zhangshuzheng.cn:1111/sso/loginGETMozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36127.0.0.1f1124085-8fdb-45e8-9a01-716153d24b11
pvBean.set(fields[15], fields[12], fields[11],
fields[4],fields[7], Integer.valueOf(fields[18]), fields[5],
fields[17], "", "200");
k.set(pvBean.getSession());
context.write(k, pvBean);
}
}
static class ClickStreamVisitReducer extends Reducer<Text, PageViewsBean, NullWritable, VisitBean> {
@Override
protected void reduce(Text session, Iterable<PageViewsBean> pvBeans, Context context) throws IOException, InterruptedException {
// 将pvBeans按照step排序
ArrayList<PageViewsBean> pvBeansList = new ArrayList<PageViewsBean>();
for (PageViewsBean pvBean : pvBeans) {
PageViewsBean bean = new PageViewsBean();
try {
BeanUtils.copyProperties(bean, pvBean);
pvBeansList.add(bean);
} catch (Exception e) {
e.printStackTrace();
}
}
Collections.sort(pvBeansList, new Comparator<PageViewsBean>() {
@Override
public int compare(PageViewsBean o1, PageViewsBean o2) {
return o1.getStep() > o2.getStep() ? 1 : -1;
}
});
// 取这次visit的首尾pageview记录,将数据放入VisitBean中
VisitBean visitBean = new VisitBean();
// 取visit的首记录
visitBean.setInPage(pvBeansList.get(0).getRequest());
visitBean.setInTime(pvBeansList.get(0).getTimestr());
// 取visit的尾记录
visitBean.setOutPage(pvBeansList.get(pvBeansList.size() - 1).getRequest());
visitBean.setOutTime(pvBeansList.get(pvBeansList.size() - 1).getTimestr());
// visit访问的页面数
visitBean.setPageVisits(pvBeansList.size());
// 来访者的ip
visitBean.setRemote_addr(pvBeansList.get(0).getRemote_addr());
// 本次visit的referal
visitBean.setReferal(pvBeansList.get(0).getReferal());
visitBean.setSession(session.toString());
context.write(NullWritable.get(), visitBean);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(ClickStreamVisit.class);
job.setMapperClass(ClickStreamVisitMapper.class);
job.setReducerClass(ClickStreamVisitReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(PageViewsBean.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(VisitBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// FileInputFormat.setInputPaths(job, new Path("c:/weblog/zhengpageviews"));
// FileOutputFormat.setOutputPath(job, new Path("c:/weblog/zhengvisitout"));
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}