爬虫-数据存储-mongodb

1、数据库操作

1、查看当前数据库:db

2、查看所有数据库:show dbs

3、切换数据库:use db_name

4、删除当前数据库:db.dropDatabase()

2、集合操作

1、创建集合:1)在插入数据时自动创建;

                        2)db.createcollection('col_name')

                        3)db.createcollection('col_name',{capped:true,size:300}) 创建一个固定大小的集合,内容不能修改,主要用于存储日志

2、查看集合:show collections

3、删除集合:db.col_name.drop()

4、插入数据:db.col_name.insert({'name':'tom','age':19}) key可以不加引号,可以放入列表进行批量插入

5、保存数据:db.col_name.save(_id:'id_string','name':'tom') 首先会尝试根据id进行更新,如果id不存在就进行插入

6、查看数据:db.col_name.find()

                       1)比较运算:db.col_name.find({age:{$ne:18}})

                                              $lt :小于; $lte : 小于等于;

                                              $gt:大于;$gte :大于等于;$ne:不等于

                        2)逻辑运算:and:db.col_name.find({age:{$gte:18},gender:ture})   年龄大于等于18的男性

                                               or   :db.col_name.find({$or:[{age:{$gt:18}},{gender:false}]})   年龄大于18或性别为女的人

                        3)范围运算:$in,$nin:判断数据是否在某个数组内

                                              db.col_name.find({age:{$in:[18,28,38]}})

                        4)正则表达式:$regex:pattern

                                              db.col_name.find({name:{$regex:'^黄'}})  查询name以‘黄’开头的数据

                         5)限制查询条数:db.col_name.find({}).limit()

                         6)略过查询条数:db.col_name.find({}).skip()  可与limit联合实现翻页效果

                        7)显示部分查询字段:db.col_name.find({},{name:1,age:1})  只显示name和age字段

                        8)排序:db.col_name.find({}).sort({name:1,age:-1})

                       9)统计:db.col_name.find({}).count()

7、去重:db.col_name.distinct('field_name')

8、更新数据:1)全文档更新:db.col_name.update({name:'tom'},{age:45})

                        2)指定键值更新:db.col_name.update({name:'tom'},{$set:{name:'jack'}})  默认只更新第一条

                        3)更新全部:db.col_name.update({name:'tom'},{$set:{name:'jack'}},{multi:true})            

9、删除数据:1)删除全部:db.col_name.remove({name:'tom'})  默认删除所有匹配项

                        2)删除一条:db.col_name.remove({name:'tom'},{justOne:true})   justOne区分大小写      

3、聚合操作

1、$group

#根据gender分组计数
db.sgo.aggregate({$group:{_id:"$gender",counter:{$sum:1}}})
{ "_id" : "女", "counter" : 3 }
{ "_id" : "男", "counter" : 5 }

#根据gender分组统计age总和
> db.sgo.aggregate({$group:{_id:"$gender",counter:{$sum:2},sum_age:{$sum:"$age"}}})
{ "_id" : "女", "counter" : 6, "sum_age" : 75 }
{ "_id" : "男", "counter" : 10, "sum_age" : 123 }

#显示各分组中的name值
db.sgo.aggregate({$group:{_id:"$gender",counter:{$sum:2},sum_age:{$sum:"$age"},name_list:{$push:"$name"}}})
{ "_id" : "女", "counter" : 6, "sum_age" : 75, "name_list" : [ "黄月英", "小乔", "甄姬" ] }
{ "_id" : "男", "counter" : 10, "sum_age" : 123, "name_list" : [ "诸葛亮", "孙权", "孙策", "刘备", "诸葛亮" ] }

2、$match

#匹配age大于20的元素,按gender分组
db.sgo.aggregate({$match:{age:{$gt:20}}},{$group:{_id:"$gender",name_list:{$push:"$name"}}})
{ "_id" : "女", "name_list" : [ "黄月英", "甄姬" ] }
{ "_id" : "男", "name_list" : [ "孙权", "孙策", "刘备", "诸葛亮" ] }

3、$project

#根据前面管道的结果选择显示内容
>db.sgo.aggregate({$match:{age:{$gt:20}}},{$group:{_id:"$gender",name_list:{$push:"$name"},avg_age:{$avg:"$age"}}})
{ "_id" : "女", "name_list" : [ "黄月英", "甄姬" ], "avg_age" : 28.5 }
{ "_id" : "男", "name_list" : [ "孙权", "孙策", "刘备", "诸葛亮" ], "avg_age" : 30.75 }
> db.sgo.aggregate({$match:{age:{$gt:20}}},{$group:{_id:"$gender",name_list:{$push:"$name"},avg_age:{$avg:"$age"}}},{$project:{name_list:1}})
{ "_id" : "女", "name_list" : [ "黄月英", "甄姬" ] }
{ "_id" : "男", "name_list" : [ "孙权", "孙策", "刘备", "诸葛亮" ] }

4、$sort;$limit;$skip

#排序:gender正序,age倒序
db.sgo.aggregate({$sort:{gender:1,age:-1}})
{ "_id" : ObjectId("5fba2b5595960d9e86d959eb"), "name" : "甄姬", "age" : 32, "gender" : "女" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e9"), "name" : "黄月英", "age" : 25, "gender" : "女" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959ea"), "name" : "小乔", "age" : 18, "gender" : "女" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e5"), "name" : "孙权", "age" : 45, "gender" : "男" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e7"), "name" : "刘备", "age" : 35, "gender" : "男" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e8"), "name" : "诸葛亮", "age" : 22, "gender" : "男" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e6"), "name" : "孙策", "age" : 21, "gender" : "男" }
{ "_id" : ObjectId("5fba206895960d9e86d959e4"), "name" : "诸葛亮", "gender" : "男" }

#略过1条数据,只显示3条数据
> db.sgo.aggregate({$sort:{gender:1,age:-1}},{$skip:1},{$limit:3})
{ "_id" : ObjectId("5fba2b5595960d9e86d959e9"), "name" : "黄月英", "age" : 25, "gender" : "女" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959ea"), "name" : "小乔", "age" : 18, "gender" : "女" }
{ "_id" : ObjectId("5fba2b5595960d9e86d959e5"), "name" : "孙权", "age" : 45, "gender" : "男" }

5、$unwind 拆分

#根据type类型进行拆分
> db.sgo1.aggregate({$unwind:{path:"$type",preserveNullAndEmptyArrays:true}})
{ "_id" : ObjectId("5fba39ca95960d9e86d959ec"), "name" : "孙权", "age" : 45, "gender" : "男", "type" : "武将" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ec"), "name" : "孙权", "age" : 45, "gender" : "男", "type" : "吴国" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ec"), "name" : "孙权", "age" : 45, "gender" : "男", "type" : "君主" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ed"), "name" : "孙策", "age" : 21, "gender" : "男", "type" : "武将" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ed"), "name" : "孙策", "age" : 21, "gender" : "男", "type" : "吴国" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ee"), "name" : "刘备", "age" : 35, "gender" : "男", "type" : "武将" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ee"), "name" : "刘备", "age" : 35, "gender" : "男", "type" : "蜀国" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ee"), "name" : "刘备", "age" : 35, "gender" : "男", "type" : "君主" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ef"), "name" : "诸葛亮", "age" : 22, "gender" : "男", "type" : "谋士" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959ef"), "name" : "诸葛亮", "age" : 22, "gender" : "男", "type" : "蜀国" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959f0"), "name" : "黄月英", "age" : 25, "gender" : "女" }
{ "_id" : ObjectId("5fba39ca95960d9e86d959f1"), "name" : "小乔", "age" : 18, "gender" : "女", "type" : null }
{ "_id" : ObjectId("5fba39ca95960d9e86d959f2"), "name" : "甄姬", "age" : 32, "gender" : "女" }

#可以再和其他聚合管道进行联用分析
> db.sgo1.aggregate({$unwind:{path:"$type",preserveNullAndEmptyArrays:true}},{$group:{_id:"$type",name:{$push:"$name"}}})
{ "_id" : null, "name" : [ "黄月英", "小乔", "甄姬" ] }
{ "_id" : "谋士", "name" : [ "诸葛亮" ] }
{ "_id" : "蜀国", "name" : [ "刘备", "诸葛亮" ] }
{ "_id" : "吴国", "name" : [ "孙权", "孙策" ] }
{ "_id" : "君主", "name" : [ "孙权", "刘备" ] }
{ "_id" : "武将", "name" : [ "孙权", "孙策", "刘备" ] }

4、python操作数据库

1、连接数据库

from pymongo import MongoClient

client = MongoClient(host,port) #本地连接可省略参数

#认证登录,非认证可省略
db = client['admin']
db.authenticate('user_name','pass_word')

#选择一个集合
collection = client['db_name']['col_name']

#插入
collection.insert({'name': "司马懿", "age": 33, "gender": "男", })

#查询
for a in collection.find(): #获得的是可迭代对象
    print(a)

#更新
collection.update({}, {"$set": {"id": "12313",},}, multi=True)  #更新所有匹配项
collection.update({"name": "公孙瓒11",}, {"$set": {"id": "113",},}, upsert=True) #如果没匹配到,便执行插入

#删除
collection.delete_one({"age":33 ,})   #删除一条
collection.delete_many({"gender":"女" ,})   #删除多条

5、其他

1、索引:1)创建索引:db.col_name.ensureIndex({field_id:1})

                2)获取索引:db.col_name.getIndexes()

                3)删除索引:db.col_name.dropIndexes('index_name')

2、常用表达式:

                     $sum:求和                $avg:求平均值

                     $min:求最小值          $max:求最大值

                    $push:按列表形式输出分组中的数据

3、启动:进入安装目录\bin  mongo

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值