#coding=UTF-8
import random // 引入random函数
import time // 引入time 函数
// url_paths的数组
url_paths=[
"class/112.html",
"class/128.html",
"class/145.html",
"class/146.html",
"class/131.html",
"class/130.html",
"learn/821",
"course/list"
]
// ip
ip_slices=[132,156,124,10,29,167,143,187,30,46,552,63,72,98,168]
// 从哪里跳转过来的
http_referers = [
"https://www.baidu.com/s?wd={query}",
"https://www.sogou.com/web?query={query}",
"http://cn.bing.com/search?q={query}",
"http://search.yahoo.com/search?p={query}",
]
// 搜索关键字
search_keywords = [
"SparkSQL实战",
"Hadoop 基础",
"Storm实战",
"Spark_Streaming实战",
"大数据面试",
"Flink 实战",
]
// 状态码
status_codes = ["200","404","500","300"]
// 随机产生url的方法
def sample_url():
return random.sample(url_paths,1)[0]
// 随机产生ip的方法
def sample_ip():
slice=random.sample(ip_slices,4)
return “.”.join([str(item) for item in slice])
// 随机产生跳转网站的方法
def sample_referer():
if random.uniform(0,1)>0.2:
return "-"
refer_str=random.sample(http_referers,1)
query_str=random.sample(search_keywords,1)
return refer_str[0].format(query=query_str[0])
// 随机产生状态码的方法
def sample_status_code():
return random.sample(status_codes,1)[0]
// 日志生成方法,默认生成10条日志
def generate_log(count=10):
time_str = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
f = open("/Users/liujingmao/data/access.log","w+")
while count>=1:
query_log = "{ip}\t{localtime}\t\"GET /{url} HTTP/1.1\"\t{status_code}\t{referer}".format(url=sample_url(),ip=sample_ip(),referer=sample_referer(),status_code=sample_status_code(),localtime=time_str)
print(query_log)
f.write(query_log+"\n")
count = count-1
// 主函数
if __name__=='__main__':
generate_log(100) // 产生100条日志
结果 如下:python /Users/liujingmao/data/generate_log.py