1. 连接数据库
# 方式一
client = pymongo.MongoClient(host='localhost', port=27017)
# 方式2: URI = "mongodb://root:123456@localhost:27017"
URI = "mongodb://localhost:27017"
client = pymongo.MongoClient(URI)
2. 数据库操作
all_dbs = client.list_database_names() # 查询所有数据库
db = client.testdb # 使用testdb库
client.drop_database("testdb") # 删除数据库
3. 集合操作
db = client.test
colls = db.list_collections() # 查询test库里所有的集合
coll = db.user # 选取user集合
db.drop_collection("user") # 删除user集合
4. 索引操作
client = pymongo.MongoClient(host='localhost', port=27017)
db = client.test
user_coll = db.user
# 创建单列升序索引
sing_asc_idx = user_coll.create_index([("name", pymongo.ASCENDING)])
print(sing_asc_idx) # name_1
# 创建单列降序索引
sing_desc_idx = user_coll.create_index([("age", pymongo.DESCENDING)])
print(sing_desc_idx) # age_-1
# 创建联合索引
union_idx = user_coll.create_index([("name", pymongo.ASCENDING), ("age", pymongo.DESCENDING)])
print(union_idx) # name_1_age_-1
# 创建唯一索引
unique_idx = user_coll.create_index([("name", pymongo.ASCENDING)], unique=True)
print(unique_idx) # name_1
# 删除某个索引
user_coll.drop_index("name_1")
# 删除全部索引
user_coll.drop_indexes()
# 为一个集合同时创建多个索引
uname = pymongo.IndexModel([("name", pymongo.DESCENDING)])
uage = pymongo.IndexModel([("age", pymongo.ASCENDING)])
idx_res = user_coll.create_indexes([uname, uage])
print(idx_res) # ['name_-1', 'age_1']
5. 文档操作
- 增
client = pymongo.MongoClient(host='localhost', port=27017)
user_coll = client.test.user
# 单值插入
doc = {"name": "lihua", "age": 25, "gender": "male"}
res = user_coll.insert_one(doc)
insert_status = res.acknowledged # 插入成功与否:True
insert_id = res.inserted_id # 插入之后的id:64858e451beea93d5224e44e
# 批量插入
docs = [{"name": "zhangli", "age": 28, "gender": "female"},
{"name": "lizhu", "age": 30, "gender": "female"}]
res = user_coll.insert_many(docs)
insert_status = res.acknowledged # True
insert_id = res.inserted_ids # [ObjectId('64858e451beea93d5224e44e'), ObjectId('64858e451beea93d5224e44f')]
- 删
client = pymongo.MongoClient(host='localhost', port=27017)
user_coll = client.test.user
# 单条删除,只删除第一条
del_res = user_coll.delete_one({"name": "zhangsan"})
print(del_res.acknowledged) # True
print(del_res.deleted_count) # 1
# 批量删除
del_res = user_coll.delete_many({"name": "zhangsan"})
print(del_res.acknowledged) # True
print(del_res.deleted_count) # 2
- 改
# 修改单条
update_res = user_coll.update_one(filter={"name": "lisi"}, update={"$set": {"age": 20, "gender": "male"}})
print(update_res.acknowledged) # True
print(update_res.matched_count) # 1
print(update_res.modified_count) # 1
print(update_res.raw_result) # {'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}
print(update_res.upserted_id) # None
# upsert=True: 不存在该条记录的话就插入
update_res = user_coll.update_one(filter={"name": "lisi"}, update={"$set": {"age": 20, "gender": "male"}}, upsert=True)
# 修改多条
update_res = user_coll.update_many(filter={"name": "lisi"}, update={"$set": {"age": 20, "gender": "male"}}, upsert=True)
-
查
client = pymongo.MongoClient(host='localhost', port=27017) book_coll = client.test.books
- and
# 书籍类型为technology,并且作者名字为 xx009 的书籍 res = book_coll.find({"type": "technology", "author.name": "xx009"}) res = book_coll.find({"$and": [{"type": "technology"}, {"author.name": "xx009"}]}) 两种方式是等价的
- or
res = book_coll.find({"$or": [{"type": "technology"}, {"type": "travel"}]})
- and or
# 作者xx009和作者xx005的旅游类书籍 res = book_coll.find({"type": "travel", "$or": [{"author.name": "xx005"}, {"author.name": "xx007"}] })
- 运算符
运算符 含义 $gt > $gte >= $lt < $lte <= $eq == $ne <> $in in $nin not in $not not $exists 是否存在 book_coll.find({"author.age": {"$lte": 30}}) # 作者年龄小于30岁的 book_coll.find({"type": {"$nin": ["travel", "literature", "technology"]}}) # 查询不在这几种类型里的book book_coll.find({"type": {"$exists": True}}) # 查询有type字段的数据
-
复杂查询
- 匹配数组字段中的单个元素
res = book_coll.find({"tag": "mongodb"}) # 只要数组中有该元素,就输出
- 匹配数组字段中的第N个元素
res = book_coll.find({"tag.0": "mongodb"}) # tag中第一个元素为mongodb的book
- 数组字段中至少有一个元素满足要求即可
res = book_coll.find({"score": {"$elemMatch": {"$gt": 15, "$lt": 20}}}) # 各平台得分中有一个大于15或者小于20即可
- 正则筛选
res = book_coll.find({"type": {"$regex": "chno"}}) # type字段包含'chno'的数据 res = book_coll.find({"type": {"$regex": "^tech"}}) # type字段以'tech'开头的数据 res = book_coll.find({"type": {"$regex": "logy$"}}) # type字段'logy'结尾的数据
- 投影
res = book_coll.find({}, {"title": 1, "type": 1, "tag": 1, "author.name": 1}) # 只显示这几列
-
聚合操作
# 每种类别的作者年龄平均值 pipelines = [ {"$group": {"_id": "$type", "avg_age": {"$avg": "$author.age"}}} ] res = book_coll.aggregate(pipelines) output: {'_id': 'novel', 'avg_age': 25.615384615384617} {'_id': 'technology', 'avg_age': 25.9} {'_id': 'literature', 'avg_age': 25.625} {'_id': 'travel', 'avg_age': 30.333333333333332} {'_id': 'sociality', 'avg_age': 25.7}
# 每种类别的数的受欢迎程度总和 pipelines = [ {"$group": {"_id": "$type", "sum_pop": {"$sum": "$favCount"}}} ] res = book_coll.aggregate(pipelines) output: {'_id': 'novel', 'sum_pop': 603.0} {'_id': 'technology', 'sum_pop': 594.0} {'_id': 'literature', 'sum_pop': 329.0} {'_id': 'travel', 'sum_pop': 495.0} {'_id': 'sociality', 'sum_pop': 454.0}
# 每种类型,每种标签的book总和 pipelines = [ {"$group": {"_id": {"outType": "$type", "outTag": "$tag"}, "bookCount": {"$sum": 1}}} ] res = book_coll.aggregate(pipelines) output: {'_id': {'outType': 'sociality', 'outTag': ['developer', 'popular']}, 'bookCount': 1} {'_id': {'outType': 'travel', 'outTag': ['document', 'mongodb']}, 'bookCount': 1} {'_id': {'outType': 'travel', 'outTag': ['mongodb', 'popular']}, 'bookCount': 1} {'_id': {'outType': 'travel', 'outTag': ['document', 'developer']}, 'bookCount': 1} {'_id': {'outType': 'sociality', 'outTag': ['document', 'popular']}, 'bookCount': 1} {'_id': {'outType': 'technology', 'outTag': ['developer', 'developer']}, 'bookCount': 1} {'_id': {'outType': 'novel', 'outTag': ['popular', 'nosql']}, 'bookCount': 2} {'_id': {'outType': 'technology', 'outTag': ['mongodb', 'developer']}, 'bookCount': 1} {'_id': {'outType': 'literature', 'outTag': ['mongodb', 'document']}, 'bookCount': 1} {'_id': {'outType': 'novel', 'outTag': ['popular', 'document']}, 'bookCount': 2}
# 查询每个作者的book的平均受欢迎程度,并按照降序排序 pipelines = [ {"$group": {"_id": "$author.name", "avg_pop": {"$avg": "$favCount"}}}, {"$sort": {"avg_pop": -1}} ] res = book_coll.aggregate(pipelines) output: {'_id': 'xx001', 'avg_pop': 71.66666666666667} {'_id': 'xx004', 'avg_pop': 70.4} {'_id': 'xx008', 'avg_pop': 60.857142857142854} {'_id': 'xx000', 'avg_pop': 58.0} {'_id': 'xx002', 'avg_pop': 48.0} {'_id': 'xx007', 'avg_pop': 44.5}
# 按照不同类型book的平均受欢迎程度、作者的平均年龄以及book数量分组并排序,最后输出指定字段。 pipelines = [ {"$group": { "_id": "$type", "avg_pop": {"$avg": "$favCount"}, "avg_age": {"$avg": "$author.age"}, "sum_book": {"$sum": 1} }}, {"$project": {"_id": 1, "avg_pop": 1}}, {"$sort": {"avg_pop": -1}} ] res = book_coll.aggregate(pipelines) output: {'_id': 'technology', 'avg_pop': 59.4} {'_id': 'travel', 'avg_pop': 55.0} {'_id': 'novel', 'avg_pop': 46.38461538461539} {'_id': 'sociality', 'avg_pop': 45.4} {'_id': 'literature', 'avg_pop': 41.125}
# 不同tag的book,其受欢迎程度排序 pipelines = [ {"$unwind": "$tag"}, # 数组里的N个元素拆分成N行 {"$group": {"_id": "$tag", "avg_pop": {"$avg": "$favCount"}, "book_count": {"$sum": 1}}}, {"$sort": OrderedDict(**{"avg_pop": -1, "book_count": 1})} ] res = book_coll.aggregate(pipelines) output: {'_id': 'nosql', 'avg_pop': 58.476190476190474, 'book_count': 21} {'_id': 'mongodb', 'avg_pop': 51.5, 'book_count': 20} {'_id': 'document', 'avg_pop': 49.142857142857146, 'book_count': 21} {'_id': 'popular', 'avg_pop': 47.18181818181818, 'book_count': 22} {'_id': 'developer', 'avg_pop': 38.875, 'book_count': 16}