http://api.mongodb.com/python/current/
集合是一组文档,MongoDB中的文档类似于关系型数据库中的行,集合如同关系型数据库中的表。
集合是无模式的,一个集合里面的文档可以是各式各样的。一个集合中不光可以是值的类型不同,键也可以完全不一样,集合里可以放置任何文档。
连接数据库
import pickle
import dataset
import datetime
import numpy as np
import pandas as pd
from multiprocessing.dummy import Pool as ThreadPool
from pymongo import MongoClient
import pymongo
common_settings = {
"serverSelectionTimeoutMS": 1000 * 1000,
"connectTimeoutMS": 10 * 1000,
"socketTimeoutMS": 1000 * 1000,
"maxPoolSize": 20,
"waitQueueMultiple": 10,
}
client = MongoClient('dds-2ze6b4728dd8a1541.mongodb.rds.aliyuncs.com:3717',
readPreference='secondaryPreferred', **common_settings) #replicaSet='mgset-4316217',
# replicaSet+readPreference 使得副本集生效的地方
db = client.get_database('Database') #mongo数据库名
db.authenticate('User Name', 'Password') #用户名+密码
print(db.name)
collection=db['collection name'] #数据库里某个文档名
拉取指定数据库集合内的数据
import json
def get_info_from_mongo(customer_id,coll,one_fun):
#consume_id是DataFrame数据表,coll是mongo库的集合,one_fun是对mongo取回数据处理的函数
re={}
for idx in customer_id.index:
cid=customer_id.iloc[idx]['cid'] #遍历用户注册id
cid=int(cid)
#print(cid)
op_condition = {'customerId': {'$eq':cid}} #条件eq等于
#注意参数cid数据格式【int/str】与mongo库里匹配参数对应
ret_filter = {'familyContacts':1,'companyName':1} #与mongo库字典对应的字典格式
res1=list(coll.find(op_condition, ret_filter)) ###find====>Query the database.
resf=[] #对idx用户的所有订单进行提取,存放到resf列表里
for i in range(len(res1)):
resf.append(one_fun(res1[i]))
re[cid]=resf
return re
#test:
get_info_from_mongo(cid[:1],collection,one_fun)
向指定数据库集合内插入数据
for i in range(len(pairs)):
cida=pairs.iloc[i,0]
cidb=pairs.iloc[i,1]
print(cida,cidb)
b_name_match_a=get_six_character(pairs,i)['kg_b_name_match_a']
b_phone_match_a=get_six_character(pairs,i)['kg_b_phone_match_a']
b_relationship_match_a=get_six_character(pairs,i)['kg_b_relationship_match_a']
a_name_match_b=get_six_character(pairs,i)['kg_a_name_match_b']
a_phone_match_b=get_six_character(pairs,i)['kg_a_phone_match_b']
a_relationship_match_b=get_six_character(pairs,i)['kg_a_relationship_match_b']
coll.update_one({"$and":[{'a_customer_id':int(cida)},{'b_customer_id':int(cidb)}]},
{'$set': {"relation_feature.kg_b_name_match_a":b_name_match_a,
"relation_feature.kg_b_phone_match_a":b_phone_match_a,
"relation_feature.kg_b_relationship_match_a":b_relationship_match_a,
"relation_feature.kg_a_name_match_b":a_name_match_b,
"relation_feature.kg_a_phone_match_b":a_phone_match_b,
"relation_feature.kg_a_relationship_match_b":a_relationship_match_b}},
upsert=True)
{
"_id" : ObjectId("5b87c3c62bb890b992659017"),
"a_customer_id" : 236,
"b_customer_id" : 2560842,
"relation_feature" : {
"kg_address_match_cnt" : 0, ###使用update_one插入,不覆盖原本有的别的同一层数据
"kg_b_name_match_a" : 0,
"kg_b_phone_match_a" : 0,
"kg_b_relationship_match_a" : [],
"kg_a_name_match_b" : 0,
"kg_a_phone_match_b" : 0,
"kg_a_relationship_match_b" : [],
}
}