这个是(Python核心编程)里面的一个例子,修改了一下用文件来存取
# -*- coding: utf8 -*-
from random import randint,choice
from string import lowercase
from sys import maxint
from time import ctime
from os import path
from re import match,search
def createdata(path):
doms = ('com','edu','net','org','gov')
print path
dataFile = open(path,'a')
#打印随机5到十条数据
for i in range(randint(5,10)):
dtint = randint(0,maxint-1)
#print dtint
dtstr = ctime(dtint)#将一个时间戳转换为时间的字符串,无参数则为当前系统时间
#print dtstr
shorter = randint(4,7)
#print shorter
em = ''#@前面的部分
for j in range(shorter):
#常量string.lowercase包含了所有小写的英文字母;常量string.uppercase包
#含了所有大写的英文字母;常量string.digits包含了从0到9的数字
em += choice(lowercase)#从序列中获取一个随机元素
#print em
longer = randint(shorter,12)
dn = ''#@后面的部分
for j in range(longer):
dn +=choice(lowercase)
dataFile.write('%s::%s@%s.%s::%d-%d-%d\n' % (dtstr,em,dn,choice(doms),dtint,shorter,longer))
if __name__ =='__main__':
datapath = 'c://Users//xiaoyizong//Desktop//re_data_test'
if(path.exists(datapath)):
print ' now read data!!!!'
with open('c://Users//xiaoyizong//Desktop//re_data_test') as re_data_test:
datastr = re_data_test.readlines()
#找出data里面的所有邮箱,输出
patt = '[a-zA-Z](([a-zA-Z0-9]*\.[a-zA-Z0-9]*)|[a-zA-Z0-9]*)[a-zA-Z]@([a-z0-9A-Z]+\.)+[a-zA-Z]{2,}'
for str in datastr:
#print patt, str
print search(patt,str).group()
else:
print ' data not exist and now create data'
createdata(datapath)
生成的 数据样式:
运行的结果是通过找出源数据里面的邮箱: