js统计网站访问,会员圈选,find 定位时间段内文件,grep过滤内容,gawk 排除,去重,格式化输出

最近的工作是带一个  商城项目,客户需求 需要圈选访问商城的会员,比如访问了A页面,没有访问B页面,或者是同时访问A页面,并且触发了A1事件的 等等信息

或者是url加有标识,从某一外部资源访问 进来的会员是哪些。

基于市面的统计软件,比如 百度统计,站长统计,腾讯统计 等等都无法满足业务,不能圈选。所以自建了一套系统,来做会员圈选

前端收集数据js 

<script>
	var _wx_tongji_={};
	_wx_tongji_.addEvent=function(ev,type,val){
		var formData = new FormData();
		formData.append('ev',ev);
		formData.append('type',type);
		formData.append('val',val||0);
		formData.append('url',encodeURI(location.href));
		//创建xhr对象 
		var xhr = new XMLHttpRequest();
		//设置xhr请求的超时时间
		xhr.timeout = 3000;
		//设置响应返回的数据格式
		xhr.responseType = "text";
		//创建一个 post 请求,采用异步
		xhr.open('POST', ctx+'/statistics/tongji.do', true);
		//設置 header X-Requested-With:XMLHttpRequest
		xhr.setRequestHeader("X-Requested-With", "XMLHttpRequest");
		//注册相关事件回调处理函数
		xhr.onload = function(e) { 
		  if(this.status == 200||this.status == 304){
			  console.log(e.target.responseText);
		  }
		};
		//发送数据
		xhr.send(formData); 
	};
	function getUrlParam(name) { 
        var reg = new RegExp("(^|&)" + name + "=([^&]*)(&|$)", "i"); 
        var r = window.location.search.substr(1).match(reg); 
        if (r != null) return decodeURI(r[2]); return null; 
    }
	(function(){
		var wxrw=getUrlParam("from");
		if(wxrw&&wxrw.startsWith("wxrw")){
			_wx_tongji_.addEvent("access","wxrw",wxrw);
		}
	})();
</script>

前端页面 通过 _wx_tongji_,addEvent() 来提交访问以及事件数据

后台接受数据,以logback形式 写入日志

public String tongji(HttpServletRequest request){
		String url=EncodeUtils.urlDecode(EncodeUtils.urlDecode(request.getParameter("url"))).replaceAll(" ", "");

		String ev=request.getParameter("ev");
		String type=request.getParameter("type");
		String val=request.getParameter("val");
		
		String mobile="0";
		String openid=ServletUtils.getOpenId();
		
		if(StringUtils.isBlank(openid))
			openid="0";
		JSONObject user=ServletUtils.getUser();
		if(user!=null){
			mobile=user.getString("mobile");
		}
		StringBuffer str=new StringBuffer();
		str.append(ev);
		str.append(" ");
		str.append(type);
		str.append(" ");
		str.append(val);
		str.append(" ");
		str.append(openid);
		str.append(" ");
		str.append(mobile);
		str.append(" ");
		str.append(url);
		logger.info(str.toString());
		return null;
	}

日志记录数据格式如下

年月日  时分秒   访问/事件   类型   值  ID  手机号  具体访问url

170719 18:22:19 access goods 8498 id1111 手机号11111  https://url11111
170719 18:22:19 access goods 8081 id2222 手机号22222  https://url22222


上图为 前台页面查询 输入 以及反馈结果,数字为 对应子对象的数据处理结果。

前台页面提交后台的参数为json,提交的时候 result 以及result_value 值为空, 这2个值为后台执行分析之后的反馈结果记录

{
    "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.6688393061188297.log",
    "flag":0,
    "result_value":"278674",
    "sons":[
        {
            "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9578616973272448.log",
            "search":{
                "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.34378951503811495.log",
                "flag":0,
                "start":"201706091948",
                "result_value":"287696",
                "sons":[
                    {
                        "val":"9235",
                        "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.2874368262364775.log",
                        "ev":"access",
                        "result_value":"9061",
                        "type":"goods"
                    },
                    {
                        "val":"1188",
                        "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.10730146303857035.log",
                        "ev":"access",
                        "result_value":"286835",
                        "type":"page"
                    }
                ],
                "end":"201707191948"
            },
            "result_value":"278635",
            "filter":{
                "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9342168703806003.log",
                "flag":0,
                "result_value":"9061",
                "sons":[
                    {
                        "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9342168703806003.log",
                        "flag":1,
                        "start":"201707042020",
                        "result_value":"9061",
                        "sons":[
                            {
                                "val":"9235",
                                "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9342168703806003.log",
                                "ev":"access",
                                "result_value":"9061",
                                "type":"goods"
                            }
                        ],
                        "end":"201707192020"
                    }
                ]
            }
        },
        {
            "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.027657177982776227.log",
            "search":{
                "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.027657177982776227.log",
                "flag":1,
                "result_value":"223",
                "start":"201707171951",
                "sons":[
                    {
                        "val":"9699",
                        "result":"/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.027657177982776227.log",
                        "ev":"access",
                        "result_value":"223",
                        "type":"goods"
                    }
                ],
                "end":"201707191951"
            },
            "result_value":"223",
            "filter":{
                "flag":0,
                "result_value":"",
                "sons":[

                ]
            }
        }
    ],
    "html":"https://abc//tp20170719203249/201707192032490.6688393061188297"
}


后台接受参数解析处理代码

 public static String   search(String searchValueStr){
        return service.toSearch(searchValueStr);
    }
    
    String   toSearch(String searchValueStr){
        
        //查询条件
        JSONObject searchValues=JSONObject.parseObject(searchValueStr);
        
        //记录所有需要执行的命令集合
        List<String> command=new ArrayList<>();
        //临时目录
        String _tempPath=searchPath+"/tp"+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
        
        String result=m_search(searchValues, command, _tempPath,null);
        searchValues.put("result", result);
        //复制一份数据 生成html文件
        String cp_command=" gawk '{print $0,\" </br>\"}' "+result+" > "+result.replace(".log", ".html");
        command.add(cp_command);
        //统计所有分析结果的数据 行数
        String wc_l_command=" wc -l "+_tempPath+"/*.log |gawk '{print $2,\"=\",$1}'> "+_tempPath+"/result.properties";
        command.add(wc_l_command);
        //执行脚本
        execCommand(_tempPath, command);
        //设定每一个命令执行的结果得到的数据条目数 到结果集内
        setResult(searchValues, getResultCount(_tempPath));
        //设置反馈前台可以访问的最终结果的html地址
        result=domainPath+result.replace(searchPath, "").replace(".log", "");
        searchValues.put("html", result);
        //返回结果
        return searchValues.toJSONString();
    }
    
    void setResult(JSONObject params,Properties prop){
        List<String> keys=new ArrayList<>();
        keys.addAll(params.keySet());
        for(String key:keys){
            Object v=params.get(key);
            if("result".equals(key)){
                params.put("result_value", prop.getProperty(v.toString(),"0"));
            }else if(v instanceof JSONArray){
                JSONArray value=(JSONArray) v;
                for(int i=0;i<value.size();i++)
                    setResult(value.getJSONObject(i), prop);
            }else if (v instanceof JSONObject){
                setResult((JSONObject) v, prop);
            }
        }
    }
    
    Properties getResultCount(String path) {
        Properties props = new Properties();
        InputStream is = null;
        try {
            InputStream in = new BufferedInputStream(new FileInputStream(new File(path+"/result.properties")));
            props.load(in);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeQuietly(is);
        }
        
        for(Object key:props.keySet())
            System.out.println(key.toString()+":"+props.getProperty(key.toString()));
        
        System.out.println("props");
        return props;
    }
    
    String   m_search(JSONObject search,List<String> command,String path,JSONObject parent){
        //获取查询条件集合
        JSONArray sons=search.getJSONArray("sons");
        JSONObject  _search=search.getJSONObject("search");
        
        //如果沒有子对象,并且没有搜索对象,则标识是最底层的 条件查找,并非上层的组合条件
        if((sons==null||sons.size()==0)&&_search==null){
            String _result=DataSearchUtil.sonSearch(search, command, path, parent);
            search.put("result", _result);
            return _result;
        }
        //如果没有子对象,则标识是子层的 检索,以及排除条件处理
        if(sons==null||sons.size()==0){
            String _result_search=m_search(_search, command, path,_search);
            String _result_exclude="";
            JSONObject  _filter=search.getJSONObject("filter");
            if(_filter==null){
                _result_exclude= _result_search;
            }else{
                JSONArray _sons=_filter.getJSONArray("sons");
                if(_sons==null||_sons.size()==0){
                    _result_exclude=_result_search;
                }else{
                    String _result_filter=m_search(_filter, command, path,null);
                    _result_exclude=searchExclude(_result_search,_result_filter, command,path);
                }
            }
            search.put("result", _result_exclude);
            return _result_exclude;
        }
        
        // 如果存在子对象的,则标识需要 遍历查询子对象得到其 结果
        //每一个子查询最终输出结果的文件名称集合
        List<String> result_fileNames=new ArrayList<>();
        //获取 查询模块的逻辑判断类型  且/或
        boolean flag=1==search.getInteger("flag");
        for(int j=0;j<sons.size();j++){
            JSONObject s_search=sons.getJSONObject(j);
            String result=m_search(s_search,command,path,search);
            result_fileNames.add(result);
        }
        String res="";
        if(result_fileNames.size()==1)
            res= result_fileNames.get(0);
        else
            res=flag?searchAnd(result_fileNames, command,path):searchOr(result_fileNames, command,path);
        search.put("result", res);
        return res;
    }
    
    
    
    String searchOr(List<String> fileNames,List<String> command,String path){
        String _command=" gawk  '{a[$1]+=1}END{for(i in a)  print i}' " ;
        
        for(String result_file:fileNames)
            _command+=result_file+" ";
        
        String fileName=createFileName(path);
        _command+= ">"+fileName;
        command.add(_command);
        return fileName;
    }
    
    String searchAnd(List<String> fileNames,List<String> command,String path){
        // 如果是且的条件,则 加入判断 出现次数 等于 要查询的文件的数目,即为 所有文件内都存在的数据
        String _command=" gawk  '{a[$1]+=1}END{for(i in a) if(a[i]=="+fileNames.size()+") print i}' " ;
        
        for(String result_file:fileNames) 
            _command+=result_file+" ";
        
        
        String fileName=createFileName(path);
        _command+= ">"+fileName;
        command.add(_command);
        return fileName;
    }
    
    String searchExclude(String search,String filter,List<String> command ,String path){
        //  gawk '{if(ARGIND==1){a[$0]}else{if($0 in a) delete a[$0]}}END{for (i in a) print i}  a.log b.log c.log  从a里面 排除内容存在与 b c里面的条目
        
        //判断 流对应文件是第一个,则放入集合,如果不是第一个并且 对应数据在集合内存在,则从集合内删除该条数据,最终打印输出结果
        String command_filter=" gawk '{if(ARGIND==1){a[$0]}else{if($0 in a) delete a[$0]}}END{for (i in a) print i}' "+search+" "+ filter;
        //查询 排除条件之后的结果输出文件
        String fileName=createFileName(path);
        command_filter+=" > "+fileName;
        command.add(command_filter);
        return fileName;
    }
    void execCommand(String path,List<String>command){
        try {
            //创建临时目录
            new File(path).mkdir();
            //创建命令文件
            File file=new File(path+"/command.sh");
            file.createNewFile();
            //写入命令
            FileWriter fw = new FileWriter(file.getAbsoluteFile());
            BufferedWriter bw = new BufferedWriter(fw);
            for(String com:command)
                bw.write(com+"\n");
            bw.close();
            //赋予命令文件 可执行权限
            file.setExecutable(true);
            //执行命令文件
            Process p = Runtime.getRuntime().exec(new String[]{"/bin/sh","-c",path+"/command.sh"});
            InputStream is = p.getInputStream();
            p.waitFor();
            is.close();
            p.destroy();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
   public static String getCommandFind(String path,String start,String end){
        // 根据 start end  更改 临时文件 start end 的 相关时间
        // touch -t 201707070000 /var/log/tomcat/wx_tongji_temp/t_start;
        // touch -t 201707100000 /var/log/tomcat/wx_tongji_temp/t_end;
        // find . -name "*.log" -type f -newer /var/log/tomcat/wx_tongji_temp/t_start  ! -newer /var/log/tomcat/wx_tongji_temp/t_end
        String start_file=path+"/start_"+(int)(Math.random()*100000);
        String end_file=path+"/end_"+(int)(Math.random()*100000);
        
        String t_start=" touch -t "+start+" "+ start_file;
        String t_end=" touch -t "+end+" "+ end_file;
        return t_start+";\n"+t_end+";\n"+" find "+logPath+" -name '*.log' -type f -newer  "+start_file+" ! -newer "+end_file;
    }
   
   
   
   public static void main(String[] args) {
    
       TongjiService    service=new TongjiService();
       
       service.getResultCount("d:\\");
       
       
}


   public String sonSearch(JSONObject s_search, List<String> command, String path,JSONObject parent) {

        // 获取开始时间
        String start = parent.getString("start");
        // 获取结束时间
        String end = parent.getString("end");
        // 根据时间范围 得到 find 命令
        String command_find = TongjiService.getCommandFind(path, start, end);
        
        String ev=s_search.getString("ev");
        String type=s_search.getString("type");
        String val=s_search.getString("val");
        if("-1".equals(val))val="";
        
        //如果是 且的关系,则分别独立查询
        String fileName=TongjiService.createFileName(path);
        //  find . -name *.log  |xargs  grep ' access goods 1848' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' > a.log 
        String command_grep_gawk=command_find+" | xargs grep '"+ev+" "+type+(!"-1".equals(val)?" "+val:"")+" ' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' >"+fileName;
        command.add(command_grep_gawk);
        return fileName;
    }

整体逻辑,接受参数,解析内部逻辑得到所有需要 执行分析的命令,最终生成命令脚本文件,执行脚本。


某一个子查询的结果 如上面的 json 参数以及对应结果

总用量 42396
-rw-r--r--. 1 root root     6440 7月  19 20:32 201707192032490.027657177982776227.log
-rw-r--r--. 1 root root  8318105 7月  19 20:32 201707192032490.10730146303857035.log
-rw-r--r--. 1 root root   262742 7月  19 20:32 201707192032490.2874368262364775.log
-rw-r--r--. 1 root root  8343074 7月  19 20:32 201707192032490.34378951503811495.log
-rw-r--r--. 1 root root 10032154 7月  19 20:32 201707192032490.6688393061188297.html
-rw-r--r--. 1 root root  8081436 7月  19 20:32 201707192032490.6688393061188297.log
-rw-r--r--. 1 root root   262742 7月  19 20:32 201707192032490.9342168703806003.log
-rw-r--r--. 1 root root  8080332 7月  19 20:32 201707192032490.9578616973272448.log
-rwxr--r--. 1 root root     3465 7月  19 20:32 command.sh
-rw-r--r--. 1 root root        0 7月  19 19:51 end_50495
-rw-r--r--. 1 root root        0 7月  19 19:48 end_71400
-rw-r--r--. 1 root root        0 7月  19 19:48 end_96108
-rw-r--r--. 1 root root        0 7月  19 20:20 end_98721
-rw-r--r--. 1 root root      689 7月  19 20:32 result.properties
-rw-r--r--. 1 root root        0 7月  17 19:51 start_13790
-rw-r--r--. 1 root root        0 7月   4 20:20 start_21328
-rw-r--r--. 1 root root        0 6月   9 19:48 start_58068
-rw-r--r--. 1 root root        0 6月   9 19:48 start_76742
command.sh 
 touch -t 201706091948 /var/log/tomcat/wx_tongji_search/tp20170719203249/start_58068;
 touch -t 201707191948 /var/log/tomcat/wx_tongji_search/tp20170719203249/end_71400;
 find /var/log/tomcat/wx_tongji -name '*.log' -type f -newer  /var/log/tomcat/wx_tongji_search/tp20170719203249/start_58068 ! -newer /var/log/tomcat/wx_tongji_search/tp20170719203249/end_71400 | xargs grep 'access goods 9235 ' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.2874368262364775.log
 touch -t 201706091948 /var/log/tomcat/wx_tongji_search/tp20170719203249/start_76742;
 touch -t 201707191948 /var/log/tomcat/wx_tongji_search/tp20170719203249/end_96108;
 find /var/log/tomcat/wx_tongji -name '*.log' -type f -newer  /var/log/tomcat/wx_tongji_search/tp20170719203249/start_76742 ! -newer /var/log/tomcat/wx_tongji_search/tp20170719203249/end_96108 | xargs grep 'access page 1188 ' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.10730146303857035.log
 gawk  '{a[$1]+=1}END{for(i in a)  print i}' /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.2874368262364775.log /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.10730146303857035.log >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.34378951503811495.log
 touch -t 201707042020 /var/log/tomcat/wx_tongji_search/tp20170719203249/start_21328;
 touch -t 201707192020 /var/log/tomcat/wx_tongji_search/tp20170719203249/end_98721;
 find /var/log/tomcat/wx_tongji -name '*.log' -type f -newer  /var/log/tomcat/wx_tongji_search/tp20170719203249/start_21328 ! -newer /var/log/tomcat/wx_tongji_search/tp20170719203249/end_98721 | xargs grep 'access goods 9235 ' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9342168703806003.log
 gawk '{if(ARGIND==1){a[$0]}else{if($0 in a) delete a[$0]}}END{for (i in a) print i}' /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.34378951503811495.log /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9342168703806003.log > /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9578616973272448.log
 touch -t 201707171951 /var/log/tomcat/wx_tongji_search/tp20170719203249/start_13790;
 touch -t 201707191951 /var/log/tomcat/wx_tongji_search/tp20170719203249/end_50495;
 find /var/log/tomcat/wx_tongji -name '*.log' -type f -newer  /var/log/tomcat/wx_tongji_search/tp20170719203249/start_13790 ! -newer /var/log/tomcat/wx_tongji_search/tp20170719203249/end_50495 | xargs grep 'access goods 9699 ' | gawk -F ' ' '{a[$6]}END{for(i in a) print i}' >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.027657177982776227.log
 gawk  '{a[$1]+=1}END{for(i in a)  print i}' /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.9578616973272448.log /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.027657177982776227.log >/var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.6688393061188297.log
 gawk '{print $0," </br>"}' /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.6688393061188297.log > /var/log/tomcat/wx_tongji_search/tp20170719203249/201707192032490.6688393061188297.html
 wc -l /var/log/tomcat/wx_tongji_search/tp20170719203249/*.log |gawk '{print $2,"=",$1}'> /var/log/tomcat/wx_tongji_search/tp20170719203249/result.properties

touch 根据时间生成 开始和结束文件

find -newer 对比开始 结束文件 来定位时间段内的日志文件

grep  来检索需要的内容

gawk 来格式化输出结果

gawk -F ' ' '{a[$6]}END{for(i in a) print i}'
a[$6] 提取第 6 段 内容 放入数组内,END 之后再遍历数组 输出结果,达到去重的目的

gawk '{if(ARGIND==1){a[$0]}else{if($0 in a) delete a[$0]}}END{for (i in a) print i}' a.log b.log

ARGIND==1 判断是第一个文件里面的内容 ,如果是 则放入数组 a内,如果不是,并且 在数组内存在,则从输入内删除, 达到目的 A文件 排除 存在于B 文件内的数据

 gawk  '{a[$1]+=1}END{for(i in a) if(a[i]=="+fileNames.size()+") print i}' 

 gawk  '{a[$1]+=1}END{for(i in a) if(a[i]==2) print i}' a.log b.log

a[$1]+=1 遍历数据 放入数组内,并且存在重复则数量+1  

for(i in a) 遍历结果, if(a[i]==2) 如果数量==2 即== 要处理的文件的数量,则表示在2个文件内都存在,则输出内容,达到目的 获取2个文件重叠的内容


最后 将结果文件,复制一份生成html文件,加入</br> 作用于 html显示 换行


最后 wc -l 统计每个文件的条目数, gawk '{print $2,"=",$1}'   格式化输出变成 文件名称=数量 的properties 文件,用于解析。


实际存储文件,关联 nginx 目录,可以通过 http://a.com/***/  web来访问。

最终反馈的结果到前台页面,html 展示,log 用于下载。






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值