通过 python 生成随机数据,并批量插入到 Amazon DocumentDB (或mongodb) 中

通过 python 生成随机数据,并批量插入到 Amazon DocumentDB (或mongodb) 中。

Python 生成随机数据。 使用 random。 例如:
随机整数 (0 - 999999)

id = random.randint(0,999999)

随机选择一个 item

enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
city = random.choice(enum_city)

随机字符串

import random
import string
str = random.sample(string.ascii_letters + string.digits, 16)
print(''.join(str))

生成想要的数据格式(json)

    enum_bool = ['true', 'false']
    enum_sexy = ['male', 'female']
    enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
    enum_device = ['IOS','Android']
    random_id = random.randint(0,99999999)
    mobile = '138%s' % random_id
    smsConsent = random.choice(enum_bool)
    emailConsent = random.choice(enum_bool)
    sexual = random.choice(enum_sexy)
    city = random.choice(enum_city)
    device = random.choice(enum_device)
    insertdata = '''
{
    "journeyId" : 1,
    "mobile": "%s",
    "email": "%s",
    "smsConsent": "%s",
    "emailConsent": "%s",
    "nextStepId": 1,
    "traits": [
      {"tag": "sexual", "value": "%s"},
      {"tag": "city", "value": "%s" },
      {"tag": "device", "value": "%s"}
    ]
}

链接 DocumentDB,插入批量数据

import pymongo
myclient = pymongo.MongoClient('mongodb://dbadmin:XXX@docdb.XXXXX.docdb.cn-north-1.amazonaws.com.cn:27017/?tls=true&tlsCAFile=rds-combined-ca-cn-bundle.pem&replicaSet=rs0&readPreference=s
econdaryPreferred&retryWrites=false')
data = [{"item1":"1"},{"item2":"2"},...]
db = myclient["dbname"]
col = db.col_test01
col.insert_many(data)
并行执行
from multiprocessing import Pool
p = Pool()
    for i in range(5):
        p.apply(func=insert_data, args=())
    p.close()
    p.join()

把以上连起来的最终代码

import pymongo
import sys
from multiprocessing import Pool
import random
import json


def insert_data():
    myclient = pymongo.MongoClient('mongodb://dbadmin:XXX@docdb.XXXXX.docdb.cn-north-1.amazonaws.com.cn:27017/?tls=true&tlsCAFile=rds-combined-ca-cn-bundle.pem&replicaSet=rs0&readPreference=s
econdaryPreferred&retryWrites=false')
    for i in range(1000):
        data = []
        db = myclient["dbname"]
        col = db.col_test01
        for j in range(1000):
            enum_bool = ['true', 'false']
            enum_sexy = ['male', 'female']
            enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
            enum_device = ['IOS','Android']
            random_id = random.randint(0,99999999)
            mobile = '138%s' % random_id
            email = '%s@csdn.com' % random_id
            smsConsent = random.choice(enum_bool)
            emailConsent = random.choice(enum_bool)
            sexual = random.choice(enum_sexy)
            city = random.choice(enum_city)
            device = random.choice(enum_device)
            insertdata = '''{
            "Id" : 1,
            "mobile": "%s",
            "email": "%s",
            "smsConsent": "%s",
            "emailConsent": "%s",
            "nextId": 1,
            "traits": [
              {"tag": "sexual", "value": "%s"},
              {"tag": "city", "value": "%s" },
              {"tag": "device", "value": "%s"}
            ]
        }
         ''' % (mobile,email,smsConsent,emailConsent,sexual,city,device)
            json_insertdata = json.loads(insertdata)
            data.append(json_insertdata)
        col.insert_many(data)


if __name__ == '__main__':
    p = Pool()
    for i in range(5):
        p.apply(func=insert_data, args=())
    p.close()
    p.join()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值