http://www.cnblogs.com/shanyou/p/3494854.html
在这个网址学习了一下mongo聚合查询
两个用pymongo查询的代码示例
def get_realtime_users(self):
rt = []
try:
#start_time_obj = datetime.strptime(startTime, '%Y-%m-%d')
#end_time_obj = datetime.strptime(endTime, '%Y-%m-%d')
#post = {'userId':userId, 'time':{'$gte':start_time_obj, '$lte':end_time_obj}}
#post['time'] = datetime.strptime(time, '%Y-%m-%d %H:%M:%S')
now = datetime.now()
delta = timedelta(days=7)
time_from = now - delta
post = {'time': {'$gte': time_from}}
doc = {'_id':0}
pageType_dict = dict()
for item in self._pageTypeMapping:
pageType_dict[item['id']] = item['pageType']
cursor = self.mdbBb[g_tableOperationRecord].find(post, doc).sort([('time',pymongo.DESCENDING)])
for item in cursor.batch_size(100):
#print(item['time'], type(item['time']))
#print(item)
item['pageType'] = pageType_dict.get(item['pageType'])
rt.append(item)
except Exception as e:
traceback.print_exc()
logging.error(e.__str__())
return rt
def get_pagetype_rank(self):
rt = []
try:
now = datetime.now()
delta = timedelta(days=7)
time_from = now - delta
doc = {'_id':0, 'pageType':1, 'time':1}
match = {
'time': {'$gte': time_from},
}
group = {
'_id': '$pageType',
'count': {'$sum': 1}
}
sort = {'count': -1}
pageType_dict = dict()
for item in self._pageTypeMapping:
pageType_dict[item['id']] = item['pageType']
#db.department.aggregate({'$group':{'_id':'$name','total_area':{'$sum':'$area'}}})
#cursor = self.mdbBb[g_tableOperationRecord].find(post, doc)
cursor = self.mdbBb[g_tableOperationRecord].aggregate([
{'$project': doc},
{'$match': match},
{'$group': group},
{'$sort': sort},
#{'$limit': 3},
])
#print(pageType_dict)
for item in cursor['result']:
#print(item)
item['_id'] = pageType_dict.get(item['_id'])
rt.append(item)
#print('rt',rt)
except Exception as e:
traceback.print_exc()
logging.error(e.__str__())
return rt
aggregate中 $group复合键写法
'$group':{ _id: {projid: '$remarks.projId', pageid: '$remarks.pageid'}, count: {$sum: 1} }
def get_projectpage_rank(self):
'''项目页面访问排行'''
rt = []
try:
now = datetime.now()
delta = timedelta(days=7)
time_from = now - delta
#doc = {'_id':0, 'pageType':1, 'time':1}
match = {
#'time': {'$gte': time_from},
'remarks.projId': {'$exists': True},
'remarks.pageid': {'$exists': True}
}
group = {
'_id': {'projid': '$remarks.projId', 'pageid': '$remarks.pageid'},
'count': {'$sum': 1}
}
sort = {'count': -1}
cursor = self.mdbBb[g_tableOperationRecord].aggregate([
#{'$project': doc},
{'$match': match},
{'$group': group},
{'$sort': sort},
{'$limit': 50},
])
for item in cursor['result']:
print(item)
rt.append(item)
print('rt',rt)
except Exception as e:
traceback.print_exc()
logging.error(e.__str__())
return rt
注意, $match是对 $project里列出的字段来匹配的,如果project里没有,则match不到
例如以下是错的
project= {'_id':0, 'pageType':1, 'time':1}
match = {
'time':{'$gte':start_time, '$lte':end_time},
'userId': userId,#projectl里没有userId字段
}
这是对的
doc = {'_id':0, 'pageType':1, 'time':1, 'userId':1}
match = {
'time':{'$gte':start_time, '$lte':end_time},
'userId': userId,
}