Python27下的oracle简单造数工具
使用python27和cx_oracle,oracle版本12c
初步设计思路
##配置文件
config.txt文件格式
#makeDataTotal,造数据总条数
#tableName,表1,表2,表3
#tableRelation,表1.字段名,表2.字段名,表3.字段名,表4.字段名
#preValue,表名.字段名,值1,值2,值3
#preValue,表名.字段名,值1,值2,值3
#preValue,表名.字段名,值1,值2,值3
#preValue,表名.字段名,值1,值2,值3
#specialIDCRAD,表名.字段名,表名.字段名, //IDCARD表示身份证
#specialShortDate,表名.字段名,表名.字段名 //ShortDate 短日期
#specialSysID,表名.字段名,表名.字段名 //系统16byte
tableRelation后是两个字段为一组的表之间相等关系
preValue是每个字段预设的可能取值
- 全动态设计 ;
只需扩展配置文本文件中表名即可对多个表操作,动态获取数据表字段个数并根据字段类型和取值范围进行随机赋值产生数据
建表
CREATE TABLE "SYSTEM"."TBORDER"
( "ORDER_ID" VARCHAR2(16 BYTE),
"ORDER_NUMBER12" NUMBER(12,0),
"ORDER_NO" VARCHAR2(20 BYTE),
"ORDER_DATESHORT" DATE,
"ORDER_DATELONG" DATE,
"ORDER_VARCHAR2_18" VARCHAR2(18 BYTE)
) ;
CREATE TABLE "SYSTEM"."TBCUSTOM"
( "CU_ID" VARCHAR2(16 BYTE),
"CU_NUMBER12" NUMBER(12,0),
"CU_LEVER" VARCHAR2(20 BYTE),
"CU_DATESHORT" DATE,
"CU_DATELONG" DATE,
"CU_VARCHAR2_18" VARCHAR2(18 BYTE)
) ;
CREATE TABLE "SYSTEM"."TBEMS"
( "EMS_ID" VARCHAR2(16 BYTE),
"EMS_NUMBER12" NUMBER(12,0),
"EMS_NO" VARCHAR2(20 BYTE),
"EMS_DATESHORT" DATE,
"EMS_DATELONG" DATE,
"EMS_VARCHAR2_18" VARCHAR2(18 BYTE)
) ;
样例配置文件
makeDataTotal,100000000
tableName,TBCUSTOM,TBORDER,TBEMS
tableRelation,TBCUSTOM.CU_ID,TBORDER.ORDER_ID,TBORDER.ORDER_NO,TBEMS.EMS_NO
preValue,TBEMS.EMS_VARCHAR2_18,经济快递,普通快递,特快专递
preValue,TBCUSTOM.CU_LEVER,一般会员,银牌会员,金牌会员
specialIDCRAD,TBCUSTOM.CU_VARCHAR2_18,TBORDER.ORDER_VARCHAR2_18
specialShortDate,TBORDER.ORDER_DATESHORT,TBEMS.EMS_DATESHORT
specialSysID,TBCUSTOM.CU_ID,TBORDER.ORDER_ID
代码
coding: utf-8
import cx_Oracle #引用模块cx_Oracle
import time
import random #随机值的模块
import logging
if __name__ == "__main__":
filename = 'config.txt' # txt文件和当前脚本在同一目录下,所以不用写具体路径
makeDataTotal=1L #需做数据总条数
tableRelation=[]
dicPreValue={} #预设值字典(key值加list)
tableSchema=[] #表结构
tableParagraphNum =[] #表字段数
specialIDCRAD=[]
specialShortDate=[]
ParagraphResult="" #字段生成结果
strRandom="abcdefghigklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789"
a1 = (1970, 1, 2, 0, 0, 0, 0, 0, 0) # 设置开始日期时间元组(1970-01-01 00:00:00)
a2 = (1990, 12, 31, 23, 59, 59, 0, 0, 0) # 设置结束日期时间元组(1990-12-31 23:59:59)
try:
with open(filename, 'r') as file_to_read:
while True:
lines = file_to_read.readline() # 整行读取数据
if not lines:
break
pass
# p_tmp, E_tmp = [float(i) for i in lines.split(',')]
p_tmp="".join(lines.split()).split(',') # 将整行数据分割处理
if (p_tmp[0] == 'makeDataTotal'):
makeDataTotal = long(p_tmp[1])
if (p_tmp[0]=='tableName'):
tableName=p_tmp[1:]
if (p_tmp[0] == 'tableRelation'):
tempTableRelation = p_tmp[1:]
for h in range(0,len(tempTableRelation)):
if h % 2 == 0:
tableRelation.append(tempTableRelation[h]),tableRelation.append(tempTableRelation[h+1])
tableRelation.append("0"),tableRelation.append("0") #记录是否生成过该字段及计算后结果
if (p_tmp[0] == 'preValue'):
preValue = p_tmp[1:]
for k in range(len(preValue)-1):
dicPreValue.setdefault(preValue[0], []).append(preValue[k+1])
if (p_tmp[0] == 'specialIDCRAD'):
specialIDCRAD = p_tmp[1:]
if (p_tmp[0] == 'specialShortDate'):
specialShortDate = p_tmp[1:]
if (p_tmp[0] == 'specialSysID'):
specialSysID = p_tmp[1:]
pass
# pos = np.array(pos) # 将数据从list类型转换为array类型。
pass
file_to_read.close()
# 构建语句提取表结构select COLUMN_NAME,DATA_TYPE,DATA_LENGTH from user_tab_cols where table_name='TBCUSTOM';
#查询某表字段数select max(column_id) from user_tab_columns where table_name=upper('TBEMS');
conn = cx_Oracle.connect('system/Test12345@localhost:1521/orcl.mall.com') # 连接数据库
c = conn.cursor() # 获取cursor
for i in tableName:
x = c.execute('select COLUMN_NAME,DATA_TYPE,DATA_LENGTH from user_tab_cols where table_name='+"'"+ i +"'" ) # 获取表结构
x.fetchone()
tableSchema.append(x.fetchvars[0])
tableSchema.append(x.fetchvars[1])
tableSchema.append(x.fetchvars[2])
x = c.execute('select max(column_id) from user_tab_columns where table_name=upper(' + "'" + i + "')") # 获取单表字段数
x.fetchone()
tableParagraphNum.append(x.fetchvars[0].values[0])
#判断并组织数据
#INSERT ALL INTO TBCUSTOM(CU_ID,CU_NUMBER12) VALUES ('1111','123.45') INTO TBORDER(ORDER_NO) VALUES ('ASDF') select 1 from dual
strSqlPre="INSERT ALL "
strSqlAll = ""
strSqlFront=""
strSqlMIddle =""
strSqlLast=""
strSqlFinish=""
for num in range(0,makeDataTotal): #生成数据总数
for tableNum in range(0,len(tableName)): #表个数
strSqlFront=" INTO " + tableName[tableNum] + "(" #sql插入语句前段
for i in range(0,tableParagraphNum[tableNum]):
if i<tableParagraphNum[tableNum]-1:
strSqlMIddle=strSqlMIddle + tableSchema[tableNum*3].values[i] + "," #sql插入语句表字段名称
# 如果不存在索引关系,计算
if (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i] not in tableRelation):
#判断是否有预设值,判断是否是特殊字段(身份证),如果是Date类型判断是否ShortDate,判断字段类型和长度
if dicPreValue.has_key(tableName[tableNum] + "."+ tableSchema[tableNum*3].values[i]): #如果存在预设值,随机选择
j=random.randint(0,len(dicPreValue[tableName[tableNum] + "."+ tableSchema[tableNum*3].values[i]])-1)
ParagraphResult=dicPreValue[tableName[tableNum] + "."+ tableSchema[tableNum*3].values[i]][j]
strSqlLast=strSqlLast + ParagraphResult + "','"
elif (tableName[tableNum] + "."+ tableSchema[tableNum*3].values[i]) in specialIDCRAD: #身份证生成器
strSqlLast=strSqlLast + "220302198001020018" + "','"
elif (tableName[tableNum] + "."+ tableSchema[tableNum*3].values[i]) in specialShortDate: #短日期
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d", date_touple) # 将时间元组转成格式化字符串
ParagraphResult="to_date( '" + date + "', 'YYYY-MM-DD')"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ",'"
#strSqlLast = strSqlLast[0:-1:1] + "to_date( '" + time.strftime("%Y-%m-%d",time.localtime()) + "', 'YYYY-MM-DD')" + ",'"
else:
#tableSchema[tableNum*3+1].values[i] #字段类型VARCHAR2 NUMBER DATE
# tableSchema[tableNum * 3 + 2].values[i] #字段长度
if tableSchema[tableNum*3+1].values[i]=="VARCHAR2":
ParagraphResult=''.join(random.sample(strRandom,random.randint(1,int(tableSchema[tableNum * 3 + 2].values[i]))))
strSqlLast=strSqlLast + ParagraphResult + "','"
elif tableSchema[tableNum*3+1].values[i]=="NUMBER":
ParagraphResult=str(random.uniform(0,100))
strSqlLast =strSqlLast + ParagraphResult + "','"
elif tableSchema[tableNum * 3 + 1].values[i] == "DATE":
#to_date ( '2007-12-20 18:31:34' , 'YYYY-MM-DD HH24:MI:SS' )
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串
ParagraphResult="to_date( '" + date + "', 'YYYY-MM-DD HH24:MI:SS' )"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ",'" # 格式化成2016-03-20 11:45:39形式
#strSqlLast =strSqlLast[0:-1:1] + "to_date( '"+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "', 'YYYY-MM-DD HH24:MI:SS' )" + ",'" # 格式化成2016-03-20 11:45:39形式
#如果存在索引,未计算则计算后存入tableRelation
elif (tableRelation[tableRelation.index(tableName[tableNum]+"."+ tableSchema[tableNum*3].values[i])+1]=="0")or(tableRelation[tableRelation.index(tableName[tableNum]+"."+ tableSchema[tableNum*3].values[i])+2]=="0"):
# 判断是否有预设值,判断是否是特殊字段(身份证),如果是Date类型判断是否ShortDate,判断字段类型和长度
if dicPreValue.has_key(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]): # 如果存在预设值,随机选择
j = random.randint(0, len(dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]]) - 1)
ParagraphResult = dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]][j]
strSqlLast = strSqlLast + ParagraphResult + "','"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialIDCRAD: # 身份证生成器
strSqlLast = strSqlLast + "220302198001020018" + "','"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialShortDate: # 短日期
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d", date_touple) # 将时间元组转成格式化字符串
ParagraphResult = "to_date( '" + date + "', 'YYYY-MM-DD')"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ",'"
# strSqlLast = strSqlLast[0:-1:1] + "to_date( '" + time.strftime("%Y-%m-%d",time.localtime()) + "', 'YYYY-MM-DD')" + ",'"
else:
# tableSchema[tableNum*3+1].values[i] #字段类型VARCHAR2 NUMBER DATE
# tableSchema[tableNum * 3 + 2].values[i] #字段长度
if tableSchema[tableNum * 3 + 1].values[i] == "VARCHAR2":
ParagraphResult = ''.join(random.sample(strRandom, random.randint(1, int(tableSchema[tableNum * 3 + 2].values[i]))))
strSqlLast = strSqlLast + ParagraphResult + "','"
elif tableSchema[tableNum * 3 + 1].values[i] == "NUMBER":
ParagraphResult = str(random.uniform(0, 100))
strSqlLast = strSqlLast + ParagraphResult + "','"
elif tableSchema[tableNum * 3 + 1].values[i] == "DATE":
# to_date ( '2007-12-20 18:31:34' , 'YYYY-MM-DD HH24:MI:SS' )
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串
ParagraphResult = "to_date( '" + date + "', 'YYYY-MM-DD HH24:MI:SS' )"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ",'" # 格式化成2016-03-20 11:45:39形式
# strSqlLast =strSqlLast[0:-1:1] + "to_date( '"+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "', 'YYYY-MM-DD HH24:MI:SS' )" + ",'" # 格式化成2016-03-20 11:45:39形式
if tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i])%2==0:
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] = "1" #计算后改变状态并存值
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 3] = ParagraphResult #存值
else:
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 1] = "1" # 计算后改变状态并存值
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] = ParagraphResult # 存值
# 已经计算过则直接查找赋值
elif tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) % 2 == 0:
strSqlLast = strSqlLast + tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 3] + "','"
else:
strSqlLast = strSqlLast + tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] + "','"
else: #sql后半段value语句*******************************************************************
strSqlMIddle = strSqlMIddle + tableSchema[tableNum*3].values[i] + ") VALUES ('"
if (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i] not in tableRelation):
# 判断是否有预设值,判断是否是特殊字段(身份证),如果是Date类型判断是否ShortDate,判断字段类型和长度
if dicPreValue.has_key(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]): # 如果存在预设值,随机选择
j = random.randint(0, len(dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]]) - 1)
ParagraphResult=dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]][j]
strSqlLast = strSqlLast + ParagraphResult + "')"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialIDCRAD: # 身份证生成器
strSqlLast = strSqlLast + "220302198001020018" + "')"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialShortDate: # 短日期
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d", date_touple) # 将时间元组转成格式化字符串
ParagraphResult="to_date( '" + date + "', 'YYYY-MM-DD')"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ")"
else:
# tableSchema[tableNum*3+1].values[i] #字段类型VARCHAR2 NUMBER DATE
# tableSchema[tableNum * 3 + 2].values[i] #字段长度
if tableSchema[tableNum * 3 + 1].values[i] == "VARCHAR2":
ParagraphResult=''.join(random.sample(strRandom,random.randint(1,int(tableSchema[tableNum * 3 + 2].values[i]))))
strSqlLast = strSqlLast + ParagraphResult + "')"
elif tableSchema[tableNum * 3 + 1].values[i] == "NUMBER":
ParagraphResult=str(random.uniform(0,100))
strSqlLast = strSqlLast + ParagraphResult + "')"
elif tableSchema[tableNum * 3 + 1].values[i] == "DATE":
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串
ParagraphResult="to_date( '" + date + "', 'YYYY-MM-DD HH24:MI:SS' )"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ")" # 格式化成2016-03-20 11:45:39形式
elif (tableRelation[tableRelation.index(tableName[tableNum]+"."+ tableSchema[tableNum*3].values[i])+1]=="0")or(tableRelation[tableRelation.index(tableName[tableNum]+"."+ tableSchema[tableNum*3].values[i])+2]=="0"):
# 判断是否有预设值,判断是否是特殊字段(身份证),如果是Date类型判断是否ShortDate,判断字段类型和长度
if dicPreValue.has_key(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]): # 如果存在预设值,随机选择
j = random.randint(0, len(dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]]) - 1)
ParagraphResult = dicPreValue[tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]][j]
strSqlLast = strSqlLast + ParagraphResult + "')"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialIDCRAD: # 身份证生成器
strSqlLast = strSqlLast + "220302198001020018" + "')"
elif (tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) in specialShortDate: # 短日期
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d", date_touple) # 将时间元组转成格式化字符串
ParagraphResult = "to_date( '" + date + "', 'YYYY-MM-DD')"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ")"
else:
# tableSchema[tableNum*3+1].values[i] #字段类型VARCHAR2 NUMBER DATE
# tableSchema[tableNum * 3 + 2].values[i] #字段长度
if tableSchema[tableNum * 3 + 1].values[i] == "VARCHAR2":
ParagraphResult = ''.join(random.sample(strRandom, random.randint(1, int(
tableSchema[tableNum * 3 + 2].values[i]))))
strSqlLast = strSqlLast + ParagraphResult + "')"
elif tableSchema[tableNum * 3 + 1].values[i] == "NUMBER":
ParagraphResult = str(random.uniform(0, 100))
strSqlLast = strSqlLast + ParagraphResult + "')"
elif tableSchema[tableNum * 3 + 1].values[i] == "DATE":
start = time.mktime(a1) # 生成开始时间戳
end = time.mktime(a2) # 生成结束时间戳
t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
date_touple = time.localtime(t) # 将时间戳生成时间元组
date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串
ParagraphResult = "to_date( '" + date + "', 'YYYY-MM-DD HH24:MI:SS' )"
strSqlLast = strSqlLast[0:-1:1] + ParagraphResult + ")" # 格式化成2016-03-20 11:45:39形式
if tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) % 2 == 0:
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] = "1" # 计算后改变状态并存值
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 3] = ParagraphResult # 存值
else:
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 1] = "1" # 计算后改变状态并存值
tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] = ParagraphResult # 存值
elif tableRelation.index(tableName[tableNum] + "." + tableName[tableNum * 3].values[i]) % 2 == 0:
strSqlLast = strSqlLast[0:-1:1] + "'"+ tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 3].values[i]) + 2] + "')"
else:
strSqlLast = strSqlLast[0:-1:1] + "'"+ tableRelation[tableRelation.index(tableName[tableNum] + "." + tableSchema[tableNum * 2].values[i]) + 2] + "')"
strSqlAll = strSqlFront + strSqlMIddle + strSqlLast
strSqlPre=strSqlPre+strSqlAll
strSqlFront = ""
strSqlMIddle = ""
strSqlLast = ""
strSqlFinish=strSqlPre + " select 1 from dual"
# 判断是否表字段间有Relation
x = c.execute(strSqlFinish) # 插入数据
conn.commit();
c.close() # 关闭cursor
conn.close() # 关闭连接
except Exception:
print "Error "