原理:http://my.oschina.net/u/1458120/blog/545224
bitmap包:https://github.com/Doist/bitmapist
要求redis>= 2.6.0
bitmapist-master.zip 安装自己下载解压 python setup.py install
(由于包内默认连接本机redis 6379 端口 ,如果想要修改 把下载的包放到自己的项目中 修改__init__.py中
SYSTEMS = { 'default': redis.Redis(host='192.168.1.3', port=6379) }
)
不能运行在集群模式下,使用时最好keys *查看存过哪些key 设置过期时间
事件标记默认天周月
只标记天可修改__init__.py 中_mark()函数
# obj_classes = [MonthEvents, WeekEvents, DayEvents]
# obj_classes = [DayEvents]
推荐阅读:
http://www.zhihu.com/question/21581696
(cohort analysis 同期群分析)感觉bitmapist的 cohort 讲的就是这
声明:本文的测试大部分都是 开源中的原测试加上自己的一些理解,读者可以直接看源文
# -*- coding: utf-8 -*-
# from builtins import range
import os
import traceback
import subprocess
import atexit
import socket
import time
import pytest
from bitmapist.cohort import get_dates_data
from bitmapist import setup_redis, delete_all_events
from datetime import datetime, timedelta
from bitmapist import mark_event, unmark_event,\
MonthEvents, WeekEvents, DayEvents, HourEvents,\
BitOpAnd, BitOpOr, get_event_names
from bitmapist.cohort import get_dates_data
##################################################################
#下面是启动redis服务 如果已经启动可忽略
def redis_server():
"""
Fixture starting the Redis server
"""
redis_host = '192.168.15.100'
redis_port = 6379
if is_socket_open(redis_host, redis_port):
yield None
else:
proc = start_redis_server(redis_port)
wait_for_socket(redis_host, redis_port)
yield proc
proc.terminate()
def setup_redis_for_bitmapist():
setup_redis('default', '192.168.15.100', 6379)
setup_redis('default_copy', 'l92.168.8.101', 6380)
def start_redis_server(port):
"""
Helper function starting Redis server
"""
devzero = open(os.devnull, 'r')
devnull = open(os.devnull, 'w')
#查看自己的redis-serverm命令
proc = subprocess.Popen(['/usr/local/redis/bin/redis-server', '--port', str(port)],
stdin=devzero, stdout=devnull, stderr=devnull,
close_fds=True)#close_fds关闭子进程
atexit.register(lambda: proc.terminate())
return proc
def is_socket_open(host, port):
"""
Helper function which tests is the socket open
"""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.1)
return sock.connect_ex((host, port)) == 0
def wait_for_socket(host, port, seconds=3):
"""
Check if socket is up for :param:`seconds` sec, raise an error otherwise
"""
polling_interval = 0.1
iterations = int(seconds / polling_interval)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.1)
for _ in range(iterations):
result = sock.connect_ex((host, port))
if result == 0:
sock.close()
break
time.sleep(polling_interval)
else:
raise RuntimeError('Service at %s:%d is unreachable' % (host, port))
########################################################################################
def base_test():
try:
setup_redis_for_bitmapist()
redis_server()
#mark_event的两个重要参数now=None, track_hourly=None,
# track_hourly =True标记用户小时活跃 默认只标记到月周天
#now=None时指现在时间 否则 指定到特定的用户活跃时间
mark_event('active', 123, track_hourly=True)
now = datetime.utcnow()
unmark_event('active', 124, track_hourly=True)#取消标记
print MonthEvents('active', now.year, now.month).has_events_marked()
print set(get_event_names(batch=2))#获取所有的事件名称set(["active"])
print set(get_event_names(prefix='b', batch=2))#获取所有以b开头的事件名称set()set([])
# Month
assert 123 in MonthEvents('active', now.year, now.month)
assert 124 not in MonthEvents('active', now.year, now.month)
# Week
assert 123 in WeekEvents('active', now.year, now.isocalendar()[1])
assert 124 not in WeekEvents('active', now.year, now.isocalendar()[1])
# Day
assert 123 in DayEvents('active', now.year, now.month, now.day)
assert 124 not in DayEvents('active', now.year, now.month, now.day)
# Hour
assert 123 in HourEvents('active', now.year, now.month, now.day, now.hour)
assert 124 not in HourEvents('active', now.year, now.month, now.day, now.hour)
assert 124 not in HourEvents('active', now.year, now.month, now.day, now.hour-1)
mark_event("active",124)
#标记用户125本月活跃
mark_event('active', 125)
assert 125 in MonthEvents('active', now.year, now.month)
#取消用户125本月活跃
unmark_event('active', 125)
assert 125 not in MonthEvents('active', now.year, now.month)
yesterday = now - timedelta(days=1)
mark_event('active', 126, now=now)
mark_event('active', 127, now=yesterday)#把用户活跃标记到昨天的月周天上
#获取活跃的用户数 提示错误没有bitcount命令 要求redis>= 2.6.0 info查看redis版本
print MonthEvents('active', now.year, now.month).get_count()#4
print list(WeekEvents('active', now.year, now.isocalendar()[1]))# [123, 124, 126, 127]
print list(MonthEvents('active', now.year, now.month))#[123, 124, 126, 127]
ev = DayEvents('active', now.year, now.month,now.day) # [123, 124, 126]
print list(ev)#3
print len(ev)#3
print list(DayEvents('active',yesterday.year,yesterday.month,yesterday.day))#[127]
last_month = datetime.utcnow() - timedelta(days=30)
ago3_month = datetime.utcnow() - timedelta(days=60)
# 123,127 128上月活跃,127 上上月活跃
mark_event('active', 127, now=ago3_month)
mark_event('active', 123, now=last_month)
mark_event('active', 127, now=last_month)
mark_event('active', 128, now=last_month)
# 近三个月都活跃的用户(交集)
active_3_months = BitOpAnd(
MonthEvents('active', ago3_month.year, ago3_month.month),
MonthEvents('active', last_month.year, last_month.month),
MonthEvents('active', now.year, now.month)
)
print list(active_3_months)#[127]
active_3_months.delete()
#嵌套查询
active_3_months = BitOpAnd(
MonthEvents('active', ago3_month.year, ago3_month.month),
MonthEvents('active', last_month.year, last_month.month),
BitOpAnd(
MonthEvents('active', now.year, now.month))
)
print list(active_3_months)#[127]
# 近三个月活跃过的用户(并集)
print list(BitOpOr(
MonthEvents('active', ago3_month.year, ago3_month.month),
MonthEvents('active', last_month.year, last_month.month),
MonthEvents('active', now.year, now.month)))#[123, 124, 126, 127, 128]
#用备份redis查询
active_2_months = BitOpAnd(
'default_copy',
MonthEvents('active', last_month.year, last_month.month),
MonthEvents('active', now.year, now.month)
)
print list(active_2_months)
active_2_months.delete()
delete_all_events()#清理所有的bitmap
except:
traceback.print_exc()
def test_bit_operations_complex():
now = datetime.utcnow()
tom = now + timedelta(days=1)
mark_event('task1', 111, now=now)
mark_event('task1', 111, now=tom)
mark_event('task2', 111, now=now)
mark_event('task2', 111, now=tom)
mark_event('task1', 222, now=now)
mark_event('task1', 222, now=tom)
mark_event('task2', 222, now=now)
mark_event('task2', 222, now=tom)
now_events = BitOpAnd(
DayEvents('task1', now.year, now.month, now.day),
DayEvents('task2', now.year, now.month, now.day)
)
tom_events = BitOpAnd(
DayEvents('task1', tom.year, tom.month, tom.day),
DayEvents('task2', tom.year, tom.month, tom.day)
)
both_events = BitOpAnd(now_events, tom_events)
print list(both_events)
def test_bitop_key_sharing():
#测试key共享
today = datetime.utcnow()
#假设task1为唱歌 task2为跳舞
mark_event('task1', 111, now=today)
mark_event('task2', 111, now=today)
mark_event('task1', 222, now=today)
mark_event('task2', 222, now=today)
ev1_task1 = DayEvents('task1', today.year, today.month, today.day)
ev1_task2 = DayEvents('task2', today.year, today.month, today.day)
ev1_both = BitOpAnd(ev1_task1, ev1_task2)#今天即唱歌又跳舞的
ev2_task1 = DayEvents('task1', today.year, today.month, today.day)
ev2_task2 = DayEvents('task2', today.year, today.month, today.day)
ev2_both = BitOpAnd(ev2_task1, ev2_task2)
print ev1_both,ev2_both
print ev1_both.redis_key,ev2_both.redis_key
print len(ev1_both), len(ev2_both)#2,2
ev1_both.delete()
print len(ev1_both), len(ev2_both)#0,0 同一个查询的实例相同
def test_bit_operations_magic():
delete_all_events()
mark_event('foo', 1)
mark_event('foo', 2)
mark_event('bar', 2)
mark_event('bar', 3)
foo = DayEvents('foo')
bar = DayEvents('bar')
print list(foo & bar)#交集
print list(foo | bar)#并集
print list(foo ^ bar)#异或 (foo和bar不相同的)
print list(~foo & bar)
#[2]
#[1, 2, 3]
#[1, 3]
#[3]
def test_cohort():
'''测试场景:当天注册用户 三天内每天有多少活跃过'''
today = datetime.utcnow()#20160608
yes = today - timedelta(days=1)
ago2 = today - timedelta(days=2)#20160606
tomorrow = today +timedelta(days=1)
after2 = today +timedelta(days=2)
####################################前天注册用户追踪
#前天注册
mark_event("regist",111,now=ago2)
mark_event("regist",112,now=ago2)
mark_event("regist",113,now=ago2)
mark_event("regist",114,now=ago2)
mark_event("regist",115,now=ago2)
#前天活跃
mark_event("active",111,now=ago2)
mark_event("active",112,now=ago2)
mark_event("active",113,now=ago2)
mark_event("active",114,now=ago2)
#昨天活跃的
mark_event("active",111,now=yes)
mark_event("active",112,now=yes)
mark_event("active",113,now=yes)
#今天活跃的
mark_event("active",115,now=today)
############################昨天注册用户追踪
#昨天注册
mark_event("regist",116,now=yes)
mark_event("regist",117,now=yes)
mark_event("regist",118,now=yes)
mark_event("regist",119,now=yes)
mark_event("regist",120,now=yes)
#昨天活跃
mark_event("active",116,now=yes)
mark_event("active",117,now=yes)
mark_event("active",118,now=yes)
mark_event("active",119,now=yes)
#今天活跃
mark_event("active",119,now=today)
mark_event("active",120,now=today)
#明天活跃
mark_event("active",116,now=tomorrow)
########################################################今天注册用户追踪
#今天注册
mark_event("regist",121,now=today)
mark_event("regist",122,now=today)
mark_event("regist",123,now=today)
#今天活跃
mark_event("active",121,now=today)
mark_event("active",122,now=today)
#明天活跃
mark_event("active",121,now=tomorrow)
#后天活跃
mark_event("active",123,now=after2)
for select1,select1b,select2,select2b in [('regist', None, 'active', None)]:
'''select1:初始条件 本例指注册
select1b:select1的附属条件 例如中国注册用户(mark_event("registchina",123,now=today))
select2:初始条件下的过滤
select2b:同select1b
time_group:时间跨度 `days`, `weeks`, `months`, `years`
as_precent:0输出经过select过滤后的数量 1百分比
num_results:time_group+num_results 得到关注的时间范围 如近三天、三周、三月 三年
num_of_rows:对select1(或select1+select1b)条件下的select2(或select2+select2b)
输出未来几天(周/月/年)的
本例:近三天(time_group+num_results=3days)当天注册用户(select1='regist')
三天内(num_of_rows=3)每天的活跃(select2='active')人数
'''
r = get_dates_data(select1=select1, select1b=select1b,
select2=select2, select2b=select2b,
time_group='days', as_precent=0,
num_results=3, num_of_rows=3)
print list(r)
#当天注册 #当天活跃 #未来三天活跃
# [[datetime.datetime(2016, 6, 6, 6, 21, 43, 845437), 5, 4, 3, 1, 0.0],
# [datetime.datetime(2016, 6, 7, 6, 21, 43, 845437), 5, 4, 2, 1, 0.0],
# [datetime.datetime(2016, 6, 8, 6, 21, 43, 845437), 3, 2, 1, 1, '']]
#以6号为例 20160606 当天注册5人 4人活跃 未来三天20160607 在20160606注册的人中其中3人活跃 20160608 1人活跃 20160609 0人活跃(0.0代表有人活跃但不在20160606的注册人中 '' 代表压根没人活跃)
#redis中查找key:
#:6379> BITCOUNT trackist_regist_2016-6-7
#6379> BITCOUNT trackist_regist_W2016-27
#6379> BITCOUNT trackist_buyid_regist_2016-6
if __name__ == '__main__':
test_bitop_key_sharing()
delete_all_events()
test_bit_operations_magic()
delete_all_events()
test_cohort()