mongoDB学习笔记五

最新推荐文章于 2024-01-11 19:54:49 发布

retacn

最新推荐文章于 2024-01-11 19:54:49 发布

阅读量310

点赞数

分类专栏： MongoDB

本文链接：https://blog.csdn.net/retacn_yue/article/details/55096524

版权

MongoDB 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

第六章聚合

第四章

一 count

返回集合中的文档数量

> db.users.count()

二 distinct

查询所有给定键的所有不同的值

>db.runCommand({"distinct":"users","key":"username"})

{

"waitedMS" : NumberLong(0),

"values" : [

"andy",

"ann",

"mike",

"retacn yue"

"stats" : {

"n" : 4,

"nscanned" : 4,

"nscannedObjects" :0,

"timems" : 88,

"planSummary" :"DISTINCT_SCAN { username: 1.0 }"

"ok" : 1

}

三 group

在shell中运行

#查询所有数据

> db.prices.find()

{ "_id" :ObjectId("5872e51a2e29db2d68661bd3"), "time" :"10/03/2016 01:05:02", "price" : 4.23, "day" :"2016/10/03" }

{ "_id" : ObjectId("5872e51a2e29db2d68661bd4"),"time" : "10/04/2016 11:05:02", "price" : 4.27,"day" : "2016/10/04" }

{ "_id" :ObjectId("5872e51a2e29db2d68661bd5"), "time" :"10/03/2016 05:05:02", "price" : 4.1, "day" :"2016/10/03" }

{ "_id" :ObjectId("5872e51a2e29db2d68661bd6"), "time" :"10/06/2016 05:05:02", "price" : 4.3, "day" :"2016/10/06" }

{ "_id" :ObjectId("5872e51a2e29db2d68661bd7"), "time" :"10/04/2016 08:05:02", "price" : 4.01, "day" :"2016/10/04" }

#分组查询

> db.runCommand({"group":{

... "ns":"prices",

... "key":"day",

..."initial":{"time":""},

... "$reduce":function(doc,prev){

... if(doc.time>prev.time){

... prev.price=doc.price;

... prev.time=doc.time;

... }}}})

{

"waitedMS" : NumberLong(0),

"retval" : [

{

"time" :"10/06/2016 05:05:02",

"price" : 4.3

}

"count" : NumberLong(5),

"keys" : NumberLong(1),

"ok" : 1

}

#添加分组条件

> db.runCommand({"group":{

"ns":"prices",

"key":"day",

"initial":{"time":""},

"$reduce":function(doc,prev){

if(doc.time>prev.time){

prev.price=doc.price;

prev.time=doc.time;

}

"condition":{"day":{"$gt":"2016/09/30"}}}})

{

"waitedMS" : NumberLong(0),

"retval" : [

{

"time" :"10/06/2016 05:05:02",

"price" : 4.3

}

"count" : NumberLong(5),

"keys" : NumberLong(1),

"ok" : 1

}

使用python进行分组查询,示例代码如下:

from pymongo import MongoClient
import statsmodels.api as sm
import json
import pandas as pd
import time
from datetime import datetime, date, time, timezone, timedelta
import datetime as dt
import time

# 创建数据库连接
client = MongoClient()
db = client.test

# 添加记录
# db.prices.insert({"day":"2016/10/03","time":"10/03/2016 01:05:02","price":4.23})
# db.prices.insert({"day":"2016/10/04","time":"10/04/2016 11:05:02","price":4.27})
# db.prices.insert({"day":"2016/10/03","time":"10/03/2016 05:05:02","price":4.10})
# db.prices.insert({"day":"2016/10/06","time":"10/06/2016 05:05:02","price":4.30})
# db.prices.insert({"day":"2016/10/04","time":"10/04/2016 08:05:02","price":4.01})

# 分组查询

func = """
        function(doc, prev){
            if (doc.time > prev.time){
                prev.price = doc.price;
                prev.time = doc.time;
            }
        }
"""
# 函数原型
# def group(self,
#           key, 进行分组的键
#           condition, 设置分组条件
#           initial, 指定reduce中参数
#           reduce,  每一条记录都作为reduce函数的第一个参数执行
#           finalize=None,
#           **kwargs):


gp = db.prices.group({"day": 1}, {"day": {"$gt": "2016/09/30"}}, {"time": ""}, func)
print(gp)

查询结果如下

[{'day': '2016/10/03', 'price': 4.1,'time': '10/03/2016 05:05:02'},

{'day': '2016/10/04', 'price': 4.27,'time': '10/04/2016 11:05:02'},

{'day': '2016/10/06', 'price': 4.3, 'time':'10/06/2016 05:05:02'}]

使用完成器

#添加博客数据

>db.blog.posts.insert({"title":"A blogpost2","content":"...","author":{"name":"andy","email":"andy@sina.com"},"comments":[{"name":"tan","email":"tan

@sina.com","content":"good","votes":2}],"tags":{"java":5,"c++":2},"day":"2017/01/10"})

>db.blog.posts.insert({"title":"A blog post3","content":"...","author":{"name":"ruby","email":"ruby@sina.com"},"comments":[{"name":"san","email":"san

@sina.com","content":"good","votes":2}],"tags":{"c":6,"winter":2,"nosql":15},"day":"2017/01/11"})

WriteResult({ "nInserted" : 1 })

#查询所有数据

> db.blog.posts.find()

{ "_id" :ObjectId("586f21115da1e3bac2c373fe"), "title" : "Ablog post", "content" : "...", "author" : {"name" : "retacn yue", "email" : "zhenhuayue@

sina.com" }, "comments" : [{ "name" : "yue", "email" :"zhenhuayue@126.com", "content" : "good","votes" : 11 }, { "name" : "three","email" : "zhenh

uayue@hotmail.com","content" : "better", "votes" : 8 }, {"name" : "zhenhua", "email" :"zhenhuayue@qq.com", "content" : "best","votes" : 11 } ], "t

ags" : { "nosql" : 4,"python" : 10, "slendding" : 2 }, "day" :"2017/01/09" }

{ "_id" :ObjectId("5873036164a560b0e9430160"), "title" : "Ablog post2", "content" : "...", "author" : {"name" : "andy", "email" : "andy@sina.com"}

, "comments" : [ {"name" : "tan", "email" :"tan@sina.com", "content" : "good","votes" : 2 } ], "tags" : { "java" : 5,"c++" : 2 }, "day" : "2017/01

/10" }

{ "_id" :ObjectId("587303ed64a560b0e9430161"), "title" : "Ablog post3", "content" : "...", "author" : {"name" : "ruby", "email" : "ruby@sina.com"}

, "comments" : [ { "name": "san", "email" : "san@sina.com","content" : "good", "votes" : 2 } ],"tags" : { "c" : 6, "winter" : 2,"nosql" : 15 }, "d

ay" : "2017/01/11" }

#分组

> db.runCommand({"group":{

... "ns":"blog.posts",

..."key":{"tags":true},

..."initial":{"tags":{}},

... "$reduce":function(doc,prev){

... for(i in doc.tags){

... if(doc.tags[i] in prev.tags){

... prev.tags[doc.tags[i]]++;

... }else{

... prev.tags[doc.tags[i]]=1;

... }

... }})

#查询结果:

{

"waitedMS" : NumberLong(0),

"retval" : [

{

"tags" : {

"2" :1,

"4" :1,

"10": 1

}

{

"tags" : {

"2" :1,

"5" :1

}

{

"tags" : {

"2" :1,

"6" :1,

"15": 1

}

"count" : NumberLong(3),

"keys" : NumberLong(3),

"ok" : 1

}

#使用完成器(finalizer)精减后的分组

> db.runCommand({"group":{

... "ns":"blog.posts",

..."key":{"tags":true},

..."initial":{"tags":{}},

... "$reduce":function(doc,prev){

... for(i in doc.tags){

... if(doc.tags[i] in prev.tags){

... prev.tags[doc.tags[i]]++;

... }else{

... prev.tags[doc.tags[i]]=1;

... }

... },

... "finalize":function(prev){

... var mostPopular=0;

... for(i in prev.tags){

... if(prev.tags[i]>mostPopular){

... prev.tag=i;

... mostPopulat=prev.tags[i];

... }

... delete prev.tags

... }

... }})

{

"waitedMS" : NumberLong(0),

"retval" : [

{

"tag" :"10"

{

"tag" :"5"

{

"tag" :"15"

}

"count" : NumberLong(3),

"keys" : NumberLong(3),

"ok" : 1

}

Python代码实现,


from pymongo import MongoClient
import statsmodels.api as sm
import json
import pandas as pd
import time
from datetime import datetime, date, time, timezone, timedelta
import datetime as dt
import time

# 创建数据库连接
client = MongoClient()
db = client.test

func = """
        function(doc, prev){
            if (doc.time > prev.time){
                prev.price = doc.price;
                prev.time = doc.time;
            }
        }
"""
func_none = """function(doc,prev){
        for(i in doc.tags){
            if(doc.tags[i] in prev.tags){
                prev.tags[doc.tags[i]]++;
            }else{
               prev.tags[doc.tags[i]]=1;
            }
        }
}
"""

func_finalize = """function(prev){
   var mostPopular=0;
   for(i in prev.tags){
      if(prev.tags[i]>mostPopular){
         prev.tag=i;
         mostPopulat=prev.tags[i];
      }
   }
   delete prev.tags
}
"""

# 函数原型
# def group(self,
#           key, 进行分组的键
#           condition, 设置分组条件
#           initial, 指定reduce中参数
#           reduce,  每一条记录都作为reduce函数的第一个参数执行
#           finalize=None, 完成器精减数据库到用户的数据
#           **kwargs):
# gp = db.prices.group({"day": 1}, {"day": {"$gt": "2016/09/30"}}, {"time": ""}, func)
# 分组
gp_none = db.blog.posts.group({"tags": True}, {},{"tags": {}}, func_none )
print("gp_none", gp_none)

# 使用完成器(finalizer)精减后的分组
gp_finalizer = db.blog.posts.group({"tags": True},{}, {"tags": {}}, func_none, finalize= func_finalize)
print("gp_finalizer", gp_finalizer)