用户访问Web时的信息日志
一、分析
为了对访问网站的用户做分析,这里获取到用户的用户名、用户类型、IP、IP归属地、运营商、浏览器User-Agent、访问时间。要获取用户访问Web时的用户信息的,就需要从页面请求request里面下手,这里以主页为页面访问切入点作为request的请求页面,DEBUG分析出,request.META里面有访问的基本信息、request.user里面包含了用户的基本信息。
当用户点击news/index页面时,从META中获取到需要登录信息的:ip和User-Agent,可以获取到用户的网络ip地址和是否为爬虫
{
...
'REMOTE_ADDR': '183.220.78.207', # 访问ip
...
'HTTP_USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', # User-Agent
...
}
从request.user对象里面获取到用户信息:图中标识的信息就是需要的
用户访问信息保存到一个单独的表中进行分析。
然后在后台站点中将访问信息展示出来,后续做数据分析。
二、数据库设计
news/models.py
from django.db import models
from utils.models import models as _models
from utils.get_os.get_os import GetOSInfo
class UserLoginInfo(_models.BaseModel):
"""
create user login log
field:
username
username_type
ip
ip_address
user_agent
last_login_time
"""
username = models.CharField(max_length=18, verbose_name="用户", help_text="用户")
user_type = models.CharField(max_length=32, verbose_name="用户类型", help_text="用户类型")
ip = models.GenericIPAddressField(max_length=15, verbose_name="IP", help_text="IP")
ip_address = models.CharField(max_length=32, verbose_name="IP地址", help_text="IP地址")
user_agent = models.CharField(max_length=128, verbose_name="User-Agent", help_text="User-Agent")
last_login_time = models.DateTimeField(auto_now=False, auto_now_add=False, verbose_name="登录时间", help_text="登录时间")
class Meta:
ordering = ["-update_time", "-id"]
db_table = "tb_user_login_info"
verbose_name = "用户登录信息"
verbose_name_plural = verbose_name
def get_os_info(self):
"""获取用户信息中浏览器和操作系统信息"""
info_obj = GetOSInfo(self.user_agent)
os_name = info_obj.get_os()["family"]
browser_name = info_obj.get_browser()["family"]
info = {
"os_name": os_name,
"browser_name": browser_name,
}
return info
def __str__(self):
return "用户登录信息:{}:{}".format(self.username,self.ip_address)
GetOSInfo调用的是Github上user-agent的方法,封装的一个类用来获取user-agent中的设备信息、浏览器信息、和操作系统信息,但是测试设备信息只能获取移动端的,PC端的设备信息获取不了。
使用user_agents时需要先导入该包
pip install pyyaml uaparser user-agents
utils/get_os/get_os.py
# -*- coding: utf-8 -*-
"""
@Time : 2020/3/26 11:29
@Author : 半纸梁
@File : get_os.py
"""
from user_agents import parse
class GetOSInfo:
def __init__(self, ua_string):
self.user_agent = ua_string
def get_device(self):
"""
获取访问用户的设备属性
测试:只能获取到移动端的设备,不能获取到PC端的设备
"""
user_agent = parse(self.user_agent)
device_obj = user_agent.device # 创建一个设备对象
family = device_obj.family # 设备名
brand = device_obj.brand # 设备商
model = device_obj.model # 设备类型
data = {
"family": family,
"brand": brand,
"model": model,
}
return data
def get_browser(self):
"""获取访问用户的浏览器属性"""
user_agent = parse(self.user_agent)
browser_obj = user_agent.browser # 创建一个浏览器对象
family = browser_obj.family # 浏览器类型
version = browser_obj.version # 版本号
version_string = browser_obj.version_string # 版本号字符串
data = {
"family": family,
"version": version,
"version_string": version_string,
}
return data
def get_os(self):
"""获取访问用户的操作系统属性"""
user_agent = parse(self.user_agent)
os_obj = user_agent.os # 创建一个操作系统对象
family = os_obj.family # 系统名
version = os_obj.version # 系统版本号
version_string = os_obj.version_string # 系统版本号字符串
data = {
"family": family,
"version": version,
"version_string": version_string,
}
return data
if __name__ == '__main__':
"""测试信息"""
ua_string = "Mozilla/5.0 (Linux; Android 6.0.1; OPPO A57 Build/MMB29M; wv) AppleWebKit/537.36 \
(KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.83 Mobile Safari/537.36 T7/10.13 baiduboxapp/10.\
13.0.10 (Baidu; P1 6.0.1)"
info_obj = GetOSInfo(ua_string)
device_data = info_obj.get_device()
browser_data = info_obj.get_browser()
os_data = info_obj.get_os()
# print(device_data)
# print(browser_data)
# print(os_data)
三、视图中保存用户登录信息
使用在news/index页面当用户访问时记录下该用户的登录信息,如果用户不退出,再次访问该页面不记录登录信息(这里处理得不好)
news/views.py
修改前面的news/views.py中IndexView视图的代码,如下
from requests import ReadTimeout
from django.views import View
from django.shortcuts import render
from django.http import Http404
from utils.IPquery import IPquery
from news import models as _models
from utils.user_config import user_config
class IndexView(View):
"""
create index page
news_tag
"""
def get(self, request):
# 1. 文章标签数据获取
tag_list = _models.Tags.objects.only("name", "id").filter(is_delete=False) # 标签数据
# 2. 热门文章数据获取
hot_article_list =_models.HotArticle.objects.select_related("article").only("id", "article__title","article__image_url").filter(is_delete=False).order_by("priority", "-update_time", "-id")
article_rank = _models.Articles.objects.only("title","clicks").filter(is_delete=False).order_by("-clicks")[:5]
# 用户访问信息记录表
username = request.user.username # 用户名
last_login_time = request.user.last_login # 登录时间
is_anonymous = request.user.is_anonymous # 是否是游客
is_staff = request.user.is_staff # 是否是管理员
is_superuser = request.user.is_superuser #是否是超级管理员
if is_anonymous:
user_type = "游客"
elif not is_anonymous and not is_superuser and not is_staff:
user_type = "普通用户"
elif is_staff and not is_superuser:
user_type = "管理员"
elif is_superuser:
user_type = "超级管理员"
else:
user_type = ""
# 访问用户的IP地址
if request.META.get("HTTP_X_FORWARDED_FOR"):
ip = request.META.get("HTTP_X_FORWARDED_FOR")
else:
ip = request.META.get("REMOTE_ADDR")
# 获取到ip具体的地址和运营商
# 这里使用了太平洋IP归属地查询和百度地图的归属地查询,防止获取不到。
try:
ip_address = IPquery.get_ip_sb_address(ip)
except ReadTimeot:
ak = user_config.BD_AK # 百度地图的ak
ip_address = IPquery.get_ip_bd_address(ak, ip)
user_agent = request.META.get("HTTP_USER_AGENT")
if not user_agent: # 反爬虫
return Http404("Page Not Found!")
kwargs = {
"username": username,
"user_type": user_type,
"ip": ip,
"ip_address": ip_address,
"user_agent": user_agent,
"last_login_time": last_login_time,
}
user_info = _models.UserLoginInfo.objects.only("username", "ip", "last_login_time"). \
filter(username=username, ip=ip, last_login_time=last_login_time).first()
if not user_info:
_models.UserLoginInfo.objects.create(**kwargs) # 保存进数据库
return render(request, "news/index.html", locals())
上面从数据库中查询username,ip,last_login_time,操作数据库用来判断是否同一个用户重复访问同一个页面,效率会很低,下面使修改后的,将ip存放在session中用来判断用户是否有重复访问同一个页面,而只记录一次信息,,具体如下:
class IndexView(View):
"""
create index page
news_tag
"""
def get(self, request):
# 获取到session中data
session_ip = request.session.get("ip","")
# 1. 文章标签数据获取
tag_list = _models.Tags.objects.only("name", "id").filter(is_delete=False) # 标签数据
# 2. 热门文章数据获取
hot_article_list = _models.HotArticle.objects.select_related("article").only("id", "article__title",
"article__image_url").filter(
is_delete=False).order_by("priority", "-update_time", "-id")
article_rank = _models.Articles.objects.only("title", "clicks").filter(is_delete=False).order_by("-clicks")[:5]
# 用户访问信息记录表
username = request.user.username # 用户名
last_login_time = request.user.last_login # 登录时间
is_anonymous = request.user.is_anonymous
is_staff = request.user.is_staff
is_superuser = request.user.is_superuser
if is_anonymous:
user_type = "游客"
elif not is_anonymous and not is_superuser and not is_staff:
user_type = "普通用户"
elif is_staff and not is_superuser:
user_type = "管理员"
elif is_superuser:
user_type = "超级管理员"
else:
user_type = ""
# 访问用户的IP地址
if request.META.get("HTTP_X_FORWARDED_FOR"):
ip = request.META.get("HTTP_X_FORWARDED_FOR")
else:
ip = request.META.get("REMOTE_ADDR")
# 获取到ip具体的地址和运营商
try:
ip_address = IPquery.get_ip_sb_address(ip)
except ReadTimeout:
ak = user_config.BD_AK
ip_address = IPquery.get_ip_bd_address(ak, ip)
user_agent = request.META.get("HTTP_USER_AGENT")
if not user_agent: # 反爬虫
return Http404("Page Not Found!")
kwargs = {
"username": username,
"user_type": user_type,
"ip": ip,
"ip_address": ip_address,
"user_agent": user_agent,
"last_login_time": last_login_time,
}
# 判断session中是否有ip,用来判断访问重复访问一个页面用户信息被重复记录
if (not session_ip) or (session_ip and ip != session_ip):
request.session["ip"] = ip
_models.UserLoginInfo.objects.create(**kwargs)
return render(request, "news/index.html", locals())
IP地址归属地查询这里调用了IPIP 归属地查询、太平洋IP归属地查询、百度地图IP归属地查询,但是
IPIP 归属地查询很容易超时,因此这里没有调用他来处理ip归属地查询。
utils/IPqeury/IPqeury.py
# -*- coding: utf-8 -*-
"""
@Time : 2020/3/25 10:34
@Author : 半纸梁
@File : test.py
"""
import json
import requests
def get_ip_address(ip):
"""
IPIP ip查询归属地
:param ip: 查询的ip
:return:
"""
url = ' http://freeapi.ipip.net/{}'.format(ip)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.132 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=0.3)
data = json.loads(response.text)
ip_address = data[1] + data[2] # 地址
supplier = data[-1] # 运营商
return ip_address, supplier
def get_ip_sb_address(ip):
"""
太平洋IP地址归属查询
:param ip: 查询的ip
:return:
"""
url = "http://whois.pconline.com.cn/ipJson.jsp?ip={}&json=true".format(ip)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.132 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=0.3)
data = json.loads(response.text)
return data["addr"]
def get_ip_bd_address(ak, ip):
"""
百度地图 IP归属地查询
:param ak: 百度地图ak
:param ip: 查询的ip
:return:
"""
url = "http://api.map.baidu.com/location/ip?ak={}&ip={}".format(ak, ip)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.132 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=0.3)
response.encoding = "unicode_escape"
data = json.loads(response.text)
return data["content"]["address"]
if __name__ == '__main__':
"""测试ip归属地查询"""
ak = "测试ak"
ip = "测试ip"
ip_address, supplier = get_ip_address(ip)
get_ip_sb_address(ip)
get_ip_bd_address(ak, ip)
四、后台站点中显示用户登录信息
1. urls.py
from django.urls import path
from admin import views
app_name = "admin"
urlpatterns = [
path("login_log/", views.LoginLogView.as_view(), name='login_log'),
path("login_log/edit/<int:info_id>/", views.LoginLogView.as_view(), name='login_log_edit'),
]
2. views.py
# 用户访问日志
class LoginLogView(View):
"""
user login log
route:/admin/login_log get
route:/admin/login_log/edit/<int:info_id>/ delete
"""
def get(self, request):
user_login_info = models.UserLoginInfo.objects. \
only("username", "user_type", "ip", "ip_address", "user_agent", "last_login_time").filter()
try:
start_time = request.GET.get("start_time", "").strip()
start_time = datetime.strptime(start_time, "Y%m%d%")
except Exception as e:
start_time = ""
# 3.2 判断结束时间end_time
try:
end_time = request.GET.get("end_time", "").strip()
end_time = datetime.strptime(end_time, "Y%m%d%")
except Exception as e:
end_time = ""
if start_time and not end_time:
user_login_info = user_login_info.filter(last_login_time__lte=start_time)
if end_time and not start_time:
user_login_info = user_login_info.filter(last_login_time__gte=end_time)
if start_time and end_time:
user_login_info = user_login_info.filter(last_login_time__range=(start_time, end_time))
if not user_login_info:
return to_json_data(errno=Code.PARAMERR, errmsg=error_map[Code.PARAMERR])
username = request.GET.get("username", "").strip()
if user_login_info:
user_login_info = user_login_info.filter(is_delete=False, username__icontains=username)
try:
page_num = int(request.GET.get("page", 1))
except Exception as e:
logger.info("页码格式错误:{}".format(e))
page_num = 1
page_obj = Paginator(user_login_info, contains.PER_PAGE_NUMBER)
try:
login_info = page_obj.page(page_num)
except EmptyPage:
login_info = page_obj.page(page_obj.num_pages)
pages_data = get_page_data(page_obj, login_info)
start_time = start_time.strftime("%Y%m%d") if start_time else ""
end_time = end_time.strftime("%Y%m%d") if end_time else ""
data = {
'login_info': login_info,
'paginator': page_obj,
'start_time': start_time,
'end_time': end_time,
'username': username,
'other_param': urlencode({
'start_time': start_time,
'end_time': end_time,
'username': username
})
}
data.update(pages_data)
return render(request, 'admin/login_log/login_log.html', context=data)
def delete(self, request, info_id):
info = models.UserLoginInfo.objects.only("id").filter(id=info_id).first()
if not info:
return to_json_data(errno=Code.PARAMERR, errmsg=error_map[Code.PARAMERR])
info.is_delete = True
info.save(update_fields=["is_delete"])
return to_json_data(errmsg="成功删除登录日志信息")
3. js
$(function () {
let $startTime = $("input[name=start_time]");
let $endTime = $("input[name=end_time]");
const config = {
autoclose: true,// 自动关闭
format: 'yyyy/mm/dd',// 日期格式
language: 'zh-CN',// 选择语言为中文
showButtonPanel: true,// 优化样式
todayHighlight: true, // 高亮今天
calendarWeeks: true,// 是否在周行的左侧显示周数
clearBtn: true,// 清除
startDate: new Date(1900, 10, 1),// 0 ~11 网站上线的时候
endDate: new Date(), // 今天
};
$startTime.datepicker(config);
$endTime.datepicker(config);
// 删除登录日志
let $infoDel = $(".btn-del"); // 1. 获取删除按钮
$infoDel.click(function () { // 2. 点击触发事件
let _this = this;
let sInfoId = $(this).data('info-id');
swal({
title: "确定删除这条登录日志吗?",
text: "删除之后,将无法恢复!",
type: "warning",
showCancelButton: true,
confirmButtonColor: "#DD6B55",
confirmButtonText: "确定删除",
cancelButtonText: "取消",
closeOnConfirm: true,
animation: 'slide-from-top',
}, function () {
$.ajax({
// 请求地址
url: "/admin/login_log/edit/" + sInfoId + "/", // url尾部需要添加/
type: "DELETE",
dataType: "json",
})
.done(function (res) {
if (res.errno === "200") {
message.showSuccess("登录日志删除成功");
$(_this).parents('tr').remove();
} else {
swal({
title: res.errmsg,
type: "error",
timer: 1000,
showCancelButton: false,
showConfirmButton: false,
})
}
})
.fail(function () {
message.showError('服务器超时,请重试!');
});
});
});
});
4. html
{% extends 'admin/base/base.html' %}
{% load staticfiles %}
{% block title %}
登录日志
{% endblock %}
{% block css %}
<link rel="stylesheet" href="{% static 'css/admin/news/bootstrap-datepicker.min.css' %}">
{% endblock %}
{% block content_header %}
登录日志
{% endblock %}
{% block content %}
<style>
.ml20 {
margin-left: 20px;
}
.mt20 {
margin-top: 20px;
}
</style>
<div class="content">
<div class="container-fluid">
<div class="box">
<div class="box-header" style="margin: 0;">
<form action="" class="form-inline">
<div class="form-group ml20 mt20">
<label for="select-time">时间:</label>
{% if start_time %}
<input type="text" class="form-control" placeholder="请选择起始时间" readonly
id="select-time" name="start_time" value="{{ start_time }}">
{% else %}
<input type="text" class="form-control" placeholder="请选择起始时间" readonly
id="select-time" name="start_time">
{% endif %}
-
{% if end_time %}
<input type="text" class="form-control" placeholder="请选择结束时间" readonly
name="end_time" value="{{ end_time }}">
{% else %}
<input type="text" class="form-control" placeholder="请选择结束时间" readonly name="end_time">
{% endif %}
</div>
<div class="form-group ml20 mt20">
<label for="author">用户名:</label>
{% if username %}
<input type="text" class="form-control" placeholder="请输入用户名" id="author"
name="username"
value="{{ username }}">
{% else %}
<input type="text" class="form-control" placeholder="请输入用户名" id="author"
name="username">
{% endif %}
</div>
<div class="form-group ml20 mt20">
<button class="btn btn-primary">查询</button>
<a href="{% url 'admin:login_log' %}" class="btn btn-info ml20">清除查询</a> {# js实现清空 #}
</div>
</form>
</div>
<br>
<br>
<div class="box-body">
<table class="table table-bordered table-hover">
<thead>
<tr>
<th>账号</th>
<th>账号类型</th>
<th>IP</th>
<th>登录地址</th>
<th>浏览器名称</th>
<th>操作系统</th>
<th>登陆时间</th>
<th>编辑</th>
</tr>
</thead>
<tbody>
{% for one_info in login_info %}
<tr>
<td>{{ one_info.username }}</td>
<td>{{ one_info.user_type }}</td>
<td>{{ one_info.ip }}</td>
<td>{{ one_info.ip_address }}</td>
<td>{{ one_info.get_os_info.browser_name }}</td>
<td>{{ one_info.get_os_info.os_name }}</td>
<td>{{ one_info.last_login_time }}</td>
<td>
<a href="javascript:void (0);" class="btn btn-xs btn-danger btn-del"
data-info-id="{{ one_info.id }}">删除</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="box-footer">
<span class="fa-pull-left">第{{ current_page_num }}页/总共{{ total_page_num }}页 共
{{ paginator.count }}条</span>
<nav class="fa-pull-right" aria-label="Page navigation">
<!-- 分页 -->
<ul class="pagination">
<!-- 上一页 -->
{% if article_info.has_previous %}
<li><a class="page-link" href="?page={{ article_info.previous_page_number }}
&{{ other_param }}">上一页</a>
</li>
{% else %}
{# <li class="disabled"><a href="javascript:void(0);">上一页</a></li>#}
<li><a class="page-link" href="#">上一页</a></li>
{% endif %}
<!-- 左标记 -->
{% if left_has_more_page %}
<li><a class="page-link" href="?page=1&{{ other_param }}">1</a></li>
<li><a class="page-link" href="javascript:void(0);">...</a></li>
{% endif %}
<!-- 左边的页码 -->
{% for left_page in left_page_range %}
<li><a class="page-link" href="?page={{ left_page }}&{{ other_param }}"
>{{ left_page }}</a></li>
{% endfor %}
<!-- 当前页面 -->
{% if current_page_num %}
<li class="page-item active"><a class="page-link"
href="?page={{ current_page_num }}&{{ other_param }}">{{ current_page_num }}</a>
</li>
{% endif %}
<!-- 右边的页面 -->
{% for right_page in right_page_range %}
<li><a class="page-link" href="?page={{ right_page }}&{{ other_param }}"
>{{ right_page }}</a></li>
{% endfor %}
<!-- 右标记 -->
{% if right_has_more_page %}
<li><a class="page-link" href="javascript:void(0);">...</a></li>
<li><a class="page-link" href="?page={{ total_page_num }}&{{ other_param }}"
>{{ total_page_num }}</a></li>
{% endif %}
<!-- 下一页 -->
{% if article_info.has_next %}
<li><a class="page-link" href="?page={{ article_info.next_page_number }}
&{{ other_param }}">下一页</a></li>
{% else %}
{# <li class="disabled"><a href="javascript:void(0);">下一页</a></li>#}
<li><a class="page-link" href="#">下一页</a></li>
{% endif %}
</ul>
</nav>
</div>
</div>
</div>
</div>
{% endblock %}
{% block script %}
<script src="{% static 'js/admin/news/bootstrap-datepicker.min.js' %}"></script>
<script src="{% static 'js/admin/news/bootstrap-datepicker.zh-CN.min.js' %}"></script>
<script src="{% static 'js/admin/login_info/login_info.js' %}"></script>
{% endblock %}