import os import json import random import string import radar import time import datetime from enum import Enum,unique NULLRATE = 0.05 STARTTIME = datetime.datetime(year = 2000 , month = 1 , day = 10 ) ENDTIME = datetime.datetime(year = 2018 , month = 12 , day = 12 ) @unique class SchemaType(Enum): String = 0 Int = 1 BigInt = 2 SmallInt = 3 Float = 4 Double = 5 Date = 6 Timestamp = 7 Null = 8 class commonSchema: def __init__( self ,name,rtype): self .name = name self . type = rtype def toJson( self ): return { "name" : self .name, "type" : self . type } def getName( self ): return self .name def getType( self ): return self . type class schemaTerms: def __init__( self ): self .schemas = [] pass def addSchema( self ,schema): self .schemas.append(schema) def toJson( self ): return [item.toJson() for item in self .schemas] def genSchema1(): tempSchemaTerm = schemaTerms() tempSchemaTerm.addSchema(commonSchema( "id" ,SchemaType. Int )) tempSchemaTerm.addSchema(commonSchema( "name" ,SchemaType.String)) tempSchemaTerm.addSchema(commonSchema( "age" ,SchemaType. Int )) tempSchemaTerm.addSchema(commonSchema( "score" ,SchemaType.Double)) tempSchemaTerm.addSchema(commonSchema( "scorefloat" ,SchemaType. Float )) return tempSchemaTerm def getSchema2(): tempSchemaTerm = schemaTerms() tempSchemaTerm.addSchema(commonSchema( "id" ,SchemaType. Int )) tempSchemaTerm.addSchema(commonSchema( "name" , SchemaType.String)) tempSchemaTerm.addSchema(commonSchema( "age" , SchemaType. Int )) tempSchemaTerm.addSchema(commonSchema( "score" , SchemaType.Double)) tempSchemaTerm.addSchema(commonSchema( "scorefloat" , SchemaType. Float )) tempSchemaTerm.addSchema(commonSchema( "tdate" ,SchemaType.Date)) tempSchemaTerm.addSchema(commonSchema( "ttimestmap" ,SchemaType.Timestamp)) return tempSchemaTerm def getString(): return ''.join(random.sample(string.ascii_letters + string.digits, 8 )) def getSmallInt(): return str (random.randint( 1 , 1000 )) def getBigInt(): return str (random.randint( 1 , 1000000 )) def getInt(): return str (random.randint( 1 , 1000000 )) def getFloat(): return str (random.random() * 1000000 ) def getDouble(): return str (random.random() * 1000000 ) def getZfill2(item): return str (item).zfill( 2 ) def getDate(): target = radar.random_datetime(STARTTIME,ENDTIME) return "{0}-{1}-{2}" . format (target.year,getZfill2(target.month),getZfill2(target.day)) def getTimestamp(): target = radar.random_datetime(STARTTIME,ENDTIME) return "{0}-{1}-{2} {3}:{4}:{5}" . format (target.year,getZfill2(target.month),getZfill2(target.day),getZfill2(target.hour),getZfill2(target.minute),getZfill2(target.second)) def getNull(): return "" def genDataFromType(rtype): temp = random.random() if temp<NULLRATE: return getNull() tempmap = { SchemaType.String:getString, SchemaType. Int :getInt, SchemaType. Float :getFloat, SchemaType.Double:getDouble, SchemaType.SmallInt:getInt, SchemaType.BigInt:getBigInt, SchemaType.Date:getDate, SchemaType.Timestamp:getTimestamp, SchemaType.Null:getNull } return tempmap[rtype]() # gen one line data def genDataFromSchmeTerm(tschemaTerms): reslist = [] for item in tschemaTerms.schemas: reslist.append(genDataFromType(item.getType())) return "," .join(reslist) class edge: def __init__( self , startVert,endVert, * args): self .startVert = startVert self .endVert = endVert self .data = list (args) print ( "data" , self .data) def __str__( self ): return "," .join([ self .startVert, self .endVert] + self .data) def toString( self ): return self .__str__() class vert: def __init__( self ,primaryKey, * args): self .primaryKey = primaryKey self .attr = list (args) def __str__( self ): return "," .join([ self .primaryKey] + self .attr) def toString( self ): return self .__str__() def __eq__( self ,other): return self .primaryKey = = other.primaryKey def __hash__( self ): return hash ( self .primaryKey) class graph: def __init__( self ,name,schemas): self .name = name self .edges = [] self .verts = set () self .schemas = schemas def add_edge( self ,redge): self .edges.append(redge) self .verts.add(vert(redge.startVert,genDataFromSchmeTerm( self .schemas))) self .verts.add(vert(redge.endVert,genDataFromSchmeTerm( self .schemas))) def show( self ,limit = 4 ): for i in range ( min (limit, len ( self .edges))): print ( self .edges[i]) def dump_file( self , fpath): with open ( "{0}/edge_{1}.csv" . format (fpath, self .name), "w" ) as file : for item in self .edges: file .write(item.toString() + "\n" ) with open ( "{0}/vert_{1}.csv" . format (fpath, self .name), "w" ) as file : for item in self .verts: file .write(item.toString() + "\n" ) if __name__ = = "__main__" : temptable = graph( "table1" ,getSchema2()) targetSchema = genSchema1() print ( "targetSchema" ,targetSchema.toJson()) for item in range ( 10000 ): startVert = str (random.randint( 0 , 200 )) endVert = str (random.randint( 201 , 400 )) temptable.add_edge(edge(startVert,endVert,genDataFromSchmeTerm(targetSchema))) temptable.dump_file( "/Users/lixiaomeng/Desktop/huigui/graphdata" ) |