【python】python批量写入假数据至4大传统数据库

可以实现连续的id生成。重启后自动获取最大id,加一继续写入。

一、数据驱动版本参考:

mysql-connector                    2.2.9
psycopg2                           2.9.5
pymssql                            2.2.7
cx-Oracle                          8.3.0

二、数据库建表语句

-- mysql
drop table if exists student;
-- 学生表
CREATE TABLE student(
s_id bigint comment '主键',
s_name VARCHAR(20) NOT NULL DEFAULT '' comment '姓名',
s_age int DEFAULT 0 comment '年龄',
s_sex VARCHAR(10) DEFAULT 'unknow' comment '性别',
s_part  varchar(10) NOT NULL comment '分区字段',
create_time timestamp(3) NOT NULL comment '创建时间',
PRIMARY KEY(s_id)
);

-- postgres
drop table if exists ischema.student;
-- 学生表
CREATE TABLE ischema.student(
s_id bigint,
s_name VARCHAR(20) NOT NULL DEFAULT '',
s_age int,
s_sex VARCHAR(10) NOT NULL DEFAULT '',
s_part varchar(10) ,
ts timestamp(3),
PRIMARY KEY(s_id)
);

-- sqlserver
drop table if exists ischema.student;
-- 学生表
CREATE TABLE ischema.student(
s_id bigint,
s_name VARCHAR(20) NOT NULL DEFAULT '',
s_age int,
s_sex VARCHAR(10) NOT NULL DEFAULT '',
s_part VARCHAR(10) NOT NULL DEFAULT '',
ts datetime2(3),
PRIMARY KEY(s_id)
);

-- oracle
drop table CDCUSER.STUDENT;
-- 学生表
CREATE TABLE CDCUSER.STUDENT(
s_id number(10) constraint pk_s_id primary KEY ,
s_name VARCHAR2(20) NOT NULL ,
s_age number(3),
s_sex VARCHAR2(10) NOT NULL ,
s_part  VARCHAR2(10),
create_time TIMESTAMP(3)
);
COMMENT ON TABLE CDCUSER.student IS 'student';
comment on column student.s_id is 'ID';
comment on column student.s_name is '姓名';
comment on column student.s_sex is '性别';
comment on column student.s_part is '分区字段';
comment on column CDCUSER.student.ts is 'timestamp';

三、python 脚本:

import mysql.connector
import psycopg2
import pymssql
import cx_Oracle

from faker import Faker
import random
import time
fake = Faker(locale='zh_CN')   

# 此位置选择需要的数据库,并取消其注释
# databaseType="postgresql"
# databaseType="sqlserver"
# databaseType="mysql"
databaseType="oracle"

conn = None
insert_cursor= None
select_str=""
insert_str=""
if "postgresql"== databaseType:
  conn=psycopg2.connect(
      dbname="test", 
      user="postgres", 
      password="123456", 
      host="192.168.100.1", 
      port="5432"
  )
  insert_cursor = conn.cursor()
  select_str="select max(s_id) a_id from ischema.student"
  insert_str="insert into ischema.student values(%s, %s, %s, %s, %s, %s)"
elif "mysql"== databaseType:
  tbl_name="student"
  conn = mysql.connector.connect(
    host="192.168.100.1",
    user="root",
    passwd="123456",
    database="test"
  )
  insert_cursor = conn.cursor()
  select_str="select max(s_id) a_id from {tbl}".format(tbl=tbl_name)
  insert_str="insert into {tbl} values(%s, %s,%s, %s, %s, %s)".format(tbl=tbl_name)

elif "sqlserver"== databaseType:
  tbl_name="ischema.student"
  conn =pymssql.connect(host='192.168.100.1',
                          port="1433",
                          user="sa",
                          password="123456",
                          database="test",
                          charset="utf8"
    )
  insert_cursor = conn.cursor()
  select_str="select max(s_id) a_id from {tbl}".format(tbl=tbl_name)
  insert_str="insert into {tbl} values(%s ,%s, %s, %s, %s, %s)".format(tbl=tbl_name)

elif "oracle"==databaseType:
  tbl_name="CDCUSER.STUDENT"
  conn = cx_Oracle.connect('tester','123456','192.168.100.1:1521/CBS')
  insert_cursor = conn.cursor()
  select_str="select max(S_ID) a_id from {tbl}".format(tbl=tbl_name)
  insert_str = "INSERT INTO {tbl} VALUES (:1, :2, :3, :4 ,:5, to_timestamp(:6,'yyyy-MM-dd HH24:mi:ss'))".format(tbl=tbl_name)


select_cursor = conn.cursor()
select_cursor.execute(select_str)
s_id_max=0
for x in select_cursor:
  if x[0] is not None:
    s_id_max=x[0]+1
  else:
    s_id_max=0
print("s_id_max:",s_id_max)

sex_list=["male","female"]

def gen_data(batch_num,s_id_max):
  data_list=[]
  for index in range(0,batch_num):
      fake_date_str=fake.date_time().strftime('%Y/%m/%d')
      fake_date_time_str=fake.date_time().strftime('%Y-%m-%d %H:%M:%S')
      vs=(s_id_max,fake.name(),random.randint(1,100),sex_list[random.randint(1,2)%2],fake_date_str,fake_date_time_str)
      data_list.append(vs)
      s_id_max+=1
  return data_list

while True:
  
  batch_num=random.randint(1,6)
  data_list=gen_data(batch_num,s_id_max)
  insert_cursor.executemany(insert_str,data_list)    
  conn.commit()
  print("commit complete: ", data_list)

  ids=list(map(lambda ele: ele[0],data_list))
  ids.sort()
  ids.reverse()
  s_id_max=ids[0]+1
  time.sleep(5)
  
conn.close()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值