一.项目目的
对用户访问网站行为进行分析,获得网站访问量基本数据,对活跃用户进行实时统计分析,从中发现用户访问网站的规律。
二.项目流程
三.项目操作
事先准备好环境
1.start-all.sh
2. ./zkmanager start
3. ./start-kafka
4. 启动Redis服务(我的Redis在hdp-2上) bin路径下
./redis-server ../redis.conf
1.将gmall-logger-0.0.1-SNAPSHOT.jar上传到linux集群
三台机器都要启动起来jar包
启动java jar包时可以在启动命令后更改端口
java -jar gmall-logger-0.0.1-SNAPSHOT.jar --server.port=8080(可优化)
2.修改nginx的配置文件
#user nobody;
//定义worker数量,默认为1
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
log_format main '{"time":"$time_iso8601", '
'"ip":"$remote_addr", '
'"referer":"$http_referer", '
'"request":"$request", '
'"status":"$status", }';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
upstream logserver {
server hdp-1:8080 weight=1;
server hdp-2:8080 weight=1;
server hdp-3:8080 weight=1;
# server hdp-2:8889 weight=2 max_fails=2 fail_timeout=30s;
# server hdp-3:8889 weight=1 max_fails=2 fail_timeout=30s;
# server hdp-4:8889 weight=1 max_fails=2 fail_timeout=30s;
}
server {
listen 80;
server_name hdp-1;
#charset koi8-r;
access_log logs/frame.access.log main;
location / {
#root html;
#index index.html index.htm;
# proxy_pass http://frame-tomcat;
root html;
index index.html index.htm;
proxy_pass http://logserver;
proxy_connect_timeout 10;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
# HTTPS server
#
#server {
# listen 443;
# server_name localhost;
# ssl on;
# ssl_certificate cert.pem;
# ssl_certificate_key cert.key;
# ssl_session_timeout 5m;
# ssl_protocols SSLv2 SSLv3 TLSv1;
# ssl_ciphers HIGH:!aNULL:!MD5;
# ssl_prefer_server_ciphers on;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}
3.启动nginx
在hdp-1上执行:cd /usr/local/nginx/sbin ./nginx
4.启动Elasticsearch
三台机器都需要进入到zpark用户下启动Elasticsearch
su zpark
cd apps/elasticsearch/bin
./elasticsearch
5.启动Kibana
在hdp-1中 cd apps/kibana/bin
./kibana
登录http://192.168.182.131:5601/app/kibana#/home?_g=() kibana可视化页面
Kibana中需要事先准备好以下部分
PUT gmall_dau
{
"mappings": {
"_doc":{
"properties":{
"mid":{
"type":"keyword"
},
"uid":{
"type":"keyword"
},
"area":{
"type":"keyword"
},
"os":{
"type":"keyword"
},
"ch":{
"type":"keyword"
},
"vs":{
"type":"keyword"
},
"logDate":{
"type":"keyword"
},
"logHour":{
"type":"keyword"
},
"logHourMinute":{
"type":"keyword"
},
"ts":{
"type":"long"
}
}
}
}
}
查询过滤指定日期中每小时的活跃用户 ,注意(要设置这个值为24,"size": 24 ,若不设置默认为10)
GET gmall_dau/_search
{
"query": {
"bool": {
"filter": {
"term": {
"logDate": "2019-12-10"
}
}
}
},
"aggs": {
"groupby_logHour": {
"terms": {
"field": "logHour",
"size": 24
}
}
}
}