图数据生成脚本

 

需要依赖 radar包用来生成随机时间

图数据生成脚本 折叠源码

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

import os

import json

import random

import string

import radar

import time

import datetime

from enum import Enum,unique

 

 

NULLRATE=0.05

STARTTIME = datetime.datetime(year=2000, month=1, day=10)

ENDTIME = datetime.datetime(year=2018, month=12, day=12)

@unique

class SchemaType(Enum):

    String=0

    Int = 1

    BigInt =2

    SmallInt=3

    Float=4

    Double=5

    Date=6

    Timestamp = 7

    Null=8

 

class commonSchema:

    def __init__(self,name,rtype):

        self.name = name

        self.type = rtype

    def toJson(self):

        return {

            "name":self.name,

            "type":self.type

        }

    def getName(self):

        return self.name

    def getType(self):

        return self.type

class schemaTerms:

    def __init__(self):

        self.schemas = []

        pass

    def addSchema(self,schema):

        self.schemas.append(schema)

    def toJson(self):

        return [item.toJson() for item in self.schemas]

 

def genSchema1():

    tempSchemaTerm = schemaTerms()

    tempSchemaTerm.addSchema(commonSchema("id",SchemaType.Int))

    tempSchemaTerm.addSchema(commonSchema("name",SchemaType.String))

    tempSchemaTerm.addSchema(commonSchema("age",SchemaType.Int))

    tempSchemaTerm.addSchema(commonSchema("score",SchemaType.Double))

    tempSchemaTerm.addSchema(commonSchema("scorefloat",SchemaType.Float))

    return tempSchemaTerm

 

def getSchema2():

    tempSchemaTerm = schemaTerms()

    tempSchemaTerm.addSchema(commonSchema("id",SchemaType.Int))

    tempSchemaTerm.addSchema(commonSchema("name", SchemaType.String))

    tempSchemaTerm.addSchema(commonSchema("age", SchemaType.Int))

    tempSchemaTerm.addSchema(commonSchema("score", SchemaType.Double))

    tempSchemaTerm.addSchema(commonSchema("scorefloat", SchemaType.Float))

    tempSchemaTerm.addSchema(commonSchema("tdate",SchemaType.Date))

    tempSchemaTerm.addSchema(commonSchema("ttimestmap",SchemaType.Timestamp))

    return tempSchemaTerm

 

 

def getString():

    return ''.join(random.sample(string.ascii_letters + string.digits, 8))

 

def getSmallInt():

    return str(random.randint(11000))

 

def getBigInt():

    return str(random.randint(1,1000000))

 

def getInt():

    return str(random.randint(1,1000000))

 

def getFloat():

    return str(random.random()*1000000)

def getDouble():

    return str(random.random()*1000000)

 

 

 

def getZfill2(item):

    return str(item).zfill(2)

def getDate():

    target = radar.random_datetime(STARTTIME,ENDTIME)

    return "{0}-{1}-{2}".format(target.year,getZfill2(target.month),getZfill2(target.day))

     

 

def getTimestamp():

    target = radar.random_datetime(STARTTIME,ENDTIME)

    return "{0}-{1}-{2} {3}:{4}:{5}".format(target.year,getZfill2(target.month),getZfill2(target.day),getZfill2(target.hour),getZfill2(target.minute),getZfill2(target.second))

 

def getNull():

    return ""

 

def genDataFromType(rtype):

    temp = random.random()

    if temp<NULLRATE:

        return getNull()

    tempmap={

        SchemaType.String:getString,

        SchemaType.Int:getInt,

        SchemaType.Float:getFloat,

        SchemaType.Double:getDouble,

        SchemaType.SmallInt:getInt,

        SchemaType.BigInt:getBigInt,

        SchemaType.Date:getDate,

        SchemaType.Timestamp:getTimestamp,

        SchemaType.Null:getNull

    }

    return tempmap[rtype]()

# gen one line data

def genDataFromSchmeTerm(tschemaTerms):

    reslist = []

    for item in tschemaTerms.schemas:

        reslist.append(genDataFromType(item.getType()))

    return ",".join(reslist)

 

class edge:

    def __init__(self, startVert,endVert,*args):

        self.startVert = startVert

        self.endVert = endVert

         

        self.data = list(args)

        print("data",self.data)

    def __str__(self):

        return ",".join([self.startVert,self.endVert]+self.data)

    def toString(self):

        return self.__str__()

 

class vert:

    def __init__(self,primaryKey,*args):

        self.primaryKey = primaryKey

        self.attr = list(args)

    def __str__(self):

        return ",".join([self.primaryKey]+self.attr)

    def toString(self):

        return self.__str__()

    def __eq__(self,other):

        return self.primaryKey == other.primaryKey

    def __hash__(self):

        return hash(self.primaryKey)

 

class graph:

    def __init__(self,name,schemas):

        self.name = name

        self.edges = []

        self.verts = set()

        self.schemas = schemas

    def add_edge(self,redge):

        self.edges.append(redge)

        self.verts.add(vert(redge.startVert,genDataFromSchmeTerm(self.schemas)))

        self.verts.add(vert(redge.endVert,genDataFromSchmeTerm(self.schemas)))

    def show(self,limit=4):

        for in range(min(limit,len(self.edges))):

            print(self.edges[i])

 

    def dump_file(self, fpath):

        with open("{0}/edge_{1}.csv".format(fpath,self.name), "w") as file:

            for item in self.edges:

                file.write(item.toString()+"\n")

        with open("{0}/vert_{1}.csv".format(fpath,self.name),"w") as file:

            for item in self.verts:

                file.write(item.toString()+"\n")

 

if __name__ == "__main__":

    temptable = graph("table1",getSchema2())

    targetSchema = genSchema1()

    print("targetSchema",targetSchema.toJson())

 

    for item in range(10000):

        startVert = str(random.randint(0,200))

        endVert = str(random.randint(201,400))

        temptable.add_edge(edge(startVert,endVert,genDataFromSchmeTerm(targetSchema)))

    temptable.dump_file("/Users/lixiaomeng/Desktop/huigui/graphdata")

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值