[9/Jun/2015:01:58:09 +0800] 日志开始时间
192.168.15.75 访问IP
- 代理IP
1542 responsetime(单位:ms)
"-" referer
GET method
http://www.aliyun.com/index.html 访问url
200 httpcode
191 requestsize(单位:byte)
2830 responsesize(单位:byte)
MISS cache命中状态
Mozilla/5.0(compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) UA头
text/html 文件类型
文章目录
V1.0
1 数据生成
package com.ruozedata.bigdata.mockData;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Random;
/**
* @Author: gyz
* @Date: 2020/8/2 9:52
*/
public class MockLogData {
private static final SimpleDateFormat SDF =new SimpleDateFormat("[dd/MMM/yyy:HH:mm:ss +0800]", Locale.ENGLISH);
private static Long longLogStartTime = 1590230000000L;
private static String[] IP = new String[]{
"202.38.150.0","202.38.170.0","202.97.32.0","219.142.00","210.77.32.0",
"218.30.86.0","124.112.0.0","61.133.128.0","220.178.0.0","60.166.0.0",
"121.204.0.0","218.66.0.0","59.56.00","61.131.0.0","218.86.0.0",
"202.97.16.0","221.7.62.0","218.30.167.0","61.128.0.0","125.76.0.0",
"119.0.0.0","114.138.0.0","218.201.299.0","58.42.0.0","61.189.128.0",
"121.58.0.0","210.168.1.0","121.8.0.0","218.16.0.0","58.60.0.0",
"116.10.0.0","220.173.0.0","219.148.0.0","221.14.243.0","61.167.6.0",
"59.172.00","61.137.0.0","219.150.0.0","218.4.0.0","59.62.0.0",
"123.184.0.0","222.74.0.0","122.4.0.0","59.48.0.0","121.59.0.0",
"61.129.0.0","218.88.0.0","220.128.0.0","58.43.0.0","218.0.0.0",
"61.138.195.0","124.118.0.0","61.92.0.0","219.151.64.0","60.29.11.0"
};
private static String[] METHOD = new String[]{"POST","GET"};
private static String[] URL = new String[]{
"http://www.ruozedata.com/","http://www.aliyun.com/index.html","https://www.sina.com.cn/",
"https://www.hao123.com/","https://www.sohu.com/","https://www.163.com/",
"https://www.jd.com","https://ai.taobao.com","http://www.hao123.com/tejia",
"http://www.icbc.com.cn/icbc/","https://www.bilibili.com/"
};
private static String[] HTTPCODE = new String[]{"200","404","500","200","200"};
public static void main(String[] args) throws Exception {
Random random = new Random();
BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(new File("data/log/log.txt"))
)
);
for(int i = 0; i <= 500000; i++){
//日志开始时间
Long newLonglogStartTime = longLogStartTime + random.nextInt(3000);
longLogStartTime = newLonglogStartTime;
String logStartTime = getLogStartTime(SDF, newLonglogStartTime);
//访问ip
String ip = IP[random.nextInt(IP.length)];
//代理ip
String dailiIp = "-";
//responsetime(单位:ms)
String responsetime = String.valueOf(random.nextInt(100000));
//referer
String referer = "\"-\"";
//method
String method = METHOD[random.nextInt(METHOD.length)];
//访问url
String url = URL[random.nextInt(URL.length)];
//httpcode
String httpcode = HTTPCODE[random.nextInt(HTTPCODE.length)];
//requestsize
String requestsize = String.valueOf(random.nextInt(1000));
//responsesize
String responsesize = getResponseSize(random);
//cache命中状态
String cacheState = "MISS";
//UA头
String ua = "Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)";
//文件类型
String fileType = "text/html";
String result = logStartTime + " " + ip + " " + dailiIp + " " + responsetime + " " + referer + " \"" +
method + " " + url + "\" " + httpcode + " " + requestsize + " " + responsesize +
" " + cacheState + " \"" + ua + "\" \"" + fileType + "\"";
writer.write(result);
writer.newLine();
}
writer.flush();
writer.close();
}
private static String getLogStartTime(SimpleDateFormat sdf, Long newLonglogStartTime) {
return sdf.format(new Date(newLonglogStartTime));
}
private static String getResponseSize(Random random) {
String right = String.valueOf(random.nextInt(10000));
String wrong1 = "-";
String wrong2 = "脏数据";
int flag = random.nextInt(1000);
if(flag % 20 == 0) {
return wrong1;
} else if(flag % 21 == 0) {
return wrong2;
} else {
return right;
}
}
}
2 ETL
2.1 IP 地址解析
2.1.1 IP地址定位库–ip2region安装使用
- 下载
- 添加maven仓库
<dependency>
<groupId>org.lionsoul</groupId>
<artifactId>ip2region</artifactId>
<version>1.7.2</version>
</dependency>
- 添加ip2region.db
把ip2region.db游动到resource下
2.1.2 ip解析工具类
package utils;
import java.io.File;
import java.lang.reflect.Method;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbSearcher;
import org.lionsoul.ip2region.Util;
public class IPUtil {
public static String getCityInfo(String ip){
//db
String dbPath = IPUtil.class.getResource("/ip2region.db").getPath();
File file = new File(dbPath);
if ( file.exists() == false ) {
System.out.println("Error: Invalid ip2region.db file");
}
//查询算法
int algorithm = DbSearcher.BTREE_ALGORITHM; //B-tree
//DbSearcher.BINARY_ALGORITHM //Binary
//DbSearcher.MEMORY_ALGORITYM //Memory
try {
DbConfig config = new DbConfig();
DbSearcher searcher = new DbSearcher(config, dbPath);
//define the method
Method method = null;
switch ( algorithm )
{
case DbSearcher.BTREE_ALGORITHM:
method = searcher.getClass().getMethod("btreeSearch", String.class);
break;
case DbSearcher.BINARY_ALGORITHM:
method = searcher.getClass().getMethod("binarySearch", String.class);
break;
case DbSearcher.MEMORY_ALGORITYM:
method = searcher.getClass().getMethod("memorySearch", String.class);
break;
}
DataBlock dataBlock = null;
if ( Util.isIpAddress(ip) == false ) {
System.out.println("Error: Invalid ip address");
}
dataBlock = (DataBlock) method.invoke(searcher, ip);
return dataBlock.getRegion();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
2.2 Mapper
package ipWork;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import utils.IPUtil;
import java.util.regex.Pattern;
public class logMapper extends Mapper<LongWritable, Text, Text, Text> {
private static IPUtil ipUtil;
Text k = new Text();
protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException {
// 1. 获取 1 行数据
String line = value.toString();
//2 切分
String[] fields = line.split(" ");
//3.判断 + ip解析
if(isInteger(fields[9]) == true){
//System.out.println(isInteger(fields[9]));
String result = addcity(fields[2]);
// 4. 写出数据
k.set(result);
context.write(value,k);
}
}
/*
* 2 解析IP
* */
public static String addcity(String ip){
String cityInfo = IPUtil.getCityInfo(ip);
return cityInfo;
}
/* 3 去掉response脏数据
* 判断是否为整数
* @param str 传入的字符串
* @return 是整数返回true,否则返回false
*/
public static boolean isInteger(String str) {
Pattern pattern = Pattern.compile("^[-\\+]?[\\d]*$");
return pattern.matcher(str).matches();
}
}
2.3 Dirver
package ipWork;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class ipDriver {
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException, ClassNotFoundException {
//0 定义输入输出路径
String input = "data/log.txt";
String output = "out";
/* Configuration conf = new Configuration();
conf.set("dfs.client.use.datanode.hostname", "true");
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://ifeng:9000");
FileSystem fileSystem = FileSystem.get(uri, conf, "ifeng");*/
// Path output = new Path("./out");
//1 获取Job信息
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
// 2 关联map
job.setMapperClass(logMapper.class);
// 3 设置最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 设置reducetask个数为0
job.setNumReduceTasks(0);
// 4 设置输入和输出路径
FileInputFormat.setInputPaths(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
// 5 提交
job.waitForCompletion(true);
}
}
3 导入到Hive表
- 创建表
create external table if not exists default.ipaddr(
date String,
ip String,
ip_agent String,
responsetime int,
referer String,
method String,
url String,
httpcode int,
requestsize int,
responsesize int,
cache String,
UA String,
text String,
text1 String,
text2 String,
text3 String,
text4 String,
ipaddr array<String>
)
row format delimited fields terminated by ' '
collection items terminated by '|';
- 导入数据
load data local inpath '/data/work_0802/part-m-00000' into table ipaddr;
load data local inpath '/data/work_0802/part-m-00001' into table ipaddr;
load data local inpath '/data/work_0802/part-m-00002' into table ipaddr;
3) 检查数据
4 计算KPI
select ipaddr[2],sum(requestsize) from ipaddr group by ipaddr[2];
V2.0
package ipWork;
import ipWork.Access;
import ipWork.Utils.DateUtil;
import ipWork.Utils.IPUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import utils.FileUtils;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.regex.Pattern;
public class ETLDriver2 {
private static String path = ipWork.Version1.IPUtil.class.getResource("/ip2region.db").getPath();
public static void main(String[] args) throws Exception {
String input = "data/Log44.txt";
String output = "out";
// 1 获取Job
Configuration configuration = new Configuration();
// configuration.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR,",");
Job job = Job.getInstance(configuration);
FileUtils.deleteOutput(configuration, output);
// 2 设置主类
job.setJarByClass(ETLDriver2.class);
// 3 设置Mapper和Reducer
job.setMapperClass(ETLMapper.class);
// job.setReducerClass(MyReducer.class);
// 4 设置Mapper阶段输出的key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
// 5 设置Reduce阶段输出的key和value类型
// job.setOutputKeyClass(NullWritable.class);
// job.setOutputValueClass(Access.class);
// 6 设置输入和输出路径
// job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.setInputPaths(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
// 7 提交Job
boolean result = job.waitForCompletion(true);
System.exit(result? 0:1 );
}
public static class ETLMapper extends Mapper<LongWritable,Text,Text, NullWritable> {
Access access = new Access();
Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//[18/Aug/2020:14:18:57 +0800] 125.76.0.0 - 23293 "-" "GET http://www.hao123.com/tejia/?ab=3&c=2&d=4" 200 573 6959 MISS "Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)" "text/html"
String[] splits = value.toString().split("\t");
//解析时间
SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
SimpleDateFormat sdf2 =new SimpleDateFormat("[dd/MMM/yyy:HH:mm:ss +0800]", Locale.ENGLISH);
try {
String date = DateUtil.parseDate(sdf2,sdf1,splits[0]);
access.setStartTime(date);
} catch (ParseException e) {
e.printStackTrace();
}
access.setIp(splits[1]);
access.setProxyIp(splits[2]);
access.setResponsetime(Integer.parseInt(splits[3]));
access.setReferer(splits[4]);
access.setMethod(splits[5].replace("\"",""));
access.setUrl(splits[6].replace("\"",""));
access.setHttpcode(Integer.parseInt(splits[7]));
access.setRequestsize(Integer.parseInt(splits[8]));
String Responsesize = splits[9];
if(Pattern.matches("[0-9]+",Responsesize)){
//清洗不是数字的responsesize
access.setResponsesize(Integer.parseInt(splits[9]));
}else {
return;
}
access.setCache(splits[10]);
access.setUa(splits[11]);
access.setFileType(splits[12]);
//解析IP
String ipMessage = IPUtil.parseIP(access.getIp(), path);
//0|0|0|0|内网IP|内网IP|16392
if(ipMessage.contains("内网IP")){
return;
}
//995|中国|0|上海|上海市|电信|125682
String[] iPsplit = ipMessage.split("\\|");
access.setCountry(iPsplit[1]);
access.setProvince(iPsplit[3]);
access.setCity(iPsplit[4]);
access.setOperator(iPsplit[5]);
text.set(access.toString());
context.write(text,NullWritable.get());
}
}
}
package ipWork;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Random;
public class MockLogData {
private static final SimpleDateFormat SDF =new SimpleDateFormat("[dd/MM/yyy:HH:mm:ss +0800]", Locale.ENGLISH);
private static String[] IP = new String[]{
"202.38.150.0","202.38.170.0","202.97.32.0","219.142.00","210.77.32.0",
"218.30.86.0","124.112.0.0","61.133.128.0","220.178.0.0","60.166.0.0",
"121.204.0.0","218.66.0.0","59.56.00","61.131.0.0","218.86.0.0",
"202.97.16.0","221.7.62.0","218.30.167.0","61.128.0.0","125.76.0.0",
"119.0.0.0","114.138.0.0","58.42.0.0","61.189.128.0",
"121.58.0.0","210.168.1.0","121.8.0.0","218.16.0.0","58.60.0.0",
"116.10.0.0","220.173.0.0","219.148.0.0","221.14.243.0","61.167.6.0",
"59.172.00","61.137.0.0","219.150.0.0","218.4.0.0","59.62.0.0",
"123.184.0.0","222.74.0.0","122.4.0.0","59.48.0.0","121.59.0.0",
"61.129.0.0","218.88.0.0","220.128.0.0","58.43.0.0","218.0.0.0",
"61.138.195.0","124.118.0.0","61.92.0.0","219.151.64.0","60.29.11.0"
};
private static String[] METHOD = new String[]{"POST","GET"};
private static String[] URL = new String[]{
"http://www.913Jerry.com/","https://www.913Jerrysina.com.cn/",
"https://www.913Jerryhao123.com/","https://www.913Jerrysohu.com/","https://www.913Jerry163.com/",
"https://www.913Jerryjd.com/","https://913Jerry.ai.taobao.com/","http://www.913Jerryhao123.com/tejia/",
"http://www.913Jerrybaidu.com.cn/icbc/","https://www.913Jerrybilibili.com/"
};
private static String[] URL_SUFFIX = new String[]{
"?ab=3&c=2&d=4","?m=7&method=ff","a=5&b=2&c=3",
"?auth=afe&file=de","?htt=f7","?y=8&u=6"
};
private static String[] CACHE = new String[]{"MISS","HIT"};
private static String[] HTTPCODE = new String[]{"200","404","500","200","200"};
private static Random random;
public static void main(String[] args) throws IOException {
//String outPath = "/20200824";
Random random = new Random();
BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(new File("D:\\workplace\\out\\Log\\Log.txt"))
));
//500000
for(int i = 0; i <= 1000; i++){
//日志开始时间
String logStartTime = SDF.format(new Date());
//访问ip
String ip = IP[random.nextInt(IP.length)];
//代理ip
String proxyIP = "-";
//responsetime(单位:ms)
String responsetime = String.valueOf(random.nextInt(100000));
//referer
String referer = "\"-\"";
//method
String method = METHOD[random.nextInt(METHOD.length)];
//访问url
String url = URL[random.nextInt(URL.length)] + URL_SUFFIX[random.nextInt(URL_SUFFIX.length)];
//httpcode
String httpcode = HTTPCODE[random.nextInt(HTTPCODE.length)];
//requestsize
String requestsize = String.valueOf(random.nextInt(1000));
//responsesize
String responsesize = getResponseSize(random,i);
//cache命中状态
String cacheState = CACHE[random.nextInt(CACHE.length)];
//UA头
String ua = "Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)";
//文件类型
String fileType = "text/html";
String result = logStartTime + "," + ip + "," + proxyIP + "," + responsetime + "," + referer + ",\"" +
method + "," + url + "\"," + httpcode + "," + requestsize + "," + responsesize +
"," + cacheState + ",\"" + ua + "\",\"" + fileType + "\"";
writer.write(result);
writer.newLine();
}
writer.flush();
writer.close();
}
private static String getLogStartTime(SimpleDateFormat sdf, Long newLonglogStartTime) {
return sdf.format(new Date(newLonglogStartTime));
}
private static String getResponseSize(Random random,int i) {
String right = String.valueOf(random.nextInt(10000));
String wrong1 = "-";
String wrong2 = "脏数据";
int flag = random.nextInt(1000);
if(i < 914){
return wrong2;
}else if (i >= 913 && flag % 20 == 0){
return wrong1;
}else{
return right;
}
}
}
utils
package ipWork.Utils;
//时间字符串转换
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class DateUtil {
public static String parseDate(SimpleDateFormat oldFomat,SimpleDateFormat newFomat,String date) throws ParseException {
Date parseDate = oldFomat.parse(date);
String newDate = newFomat.format(parseDate);
return newDate;
}
}
package ipWork.Utils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
public class FIleUtils {
public static void deleteOutput(Configuration configuration, String output) throws IOException {
FileSystem fileSystem = FileSystem.get(configuration);
Path path = new Path(output);
if(fileSystem.exists(path)){
fileSystem.delete(path,true);
}
fileSystem.close();
}
}
package ipWork.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.lang.reflect.Method;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbSearcher;
import org.lionsoul.ip2region.Util;
public class IPUtil {
private static Logger logger = LoggerFactory.getLogger(IPUtil.class);
public static String parseIP(String ip, String path) {
if (null == path) {
System.out.println("| Usage: java -jar ip2region-{version}.jar [ip2region db file]");
System.exit(0);
// return ;
}
File file = new File(path);
if (file.exists() == false) {
System.out.println("Error: Invalid ip2region.db file");
System.exit(0);
// return;
}
int algorithm = DbSearcher.BTREE_ALGORITHM;
//算法可以传参确定,这里写死
String algoName = "B-tree";
if (null != algoName) {
if (algoName.equalsIgnoreCase("binary")) {
algoName = "Binary";
algorithm = DbSearcher.BINARY_ALGORITHM;
} else if (algoName.equalsIgnoreCase("memory")) {
algoName = "Memory";
algorithm = DbSearcher.MEMORY_ALGORITYM;
}
}
DataBlock dataBlock = null;
try {
DbConfig config = new DbConfig();
DbSearcher searcher = new DbSearcher(config, path);
//define the method
Method method = null;
switch (algorithm) {
case DbSearcher.BTREE_ALGORITHM:
method = searcher.getClass().getMethod("btreeSearch", String.class);
break;
case DbSearcher.BINARY_ALGORITHM:
method = searcher.getClass().getMethod("binarySearch", String.class);
break;
case DbSearcher.MEMORY_ALGORITYM:
method = searcher.getClass().getMethod("memorySearch", String.class);
break;
}
if (Util.isIpAddress(ip) == false) {
logger.info("Error: Invalid ip address");
logger.info("Invalid ip : " + ip);
}
dataBlock = (DataBlock) method.invoke(searcher, ip);
logger.info("dataBlock: " + dataBlock.toString());
// System.out.println(dataBlock.toString());
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
return dataBlock.toString();
}
}
2.2 打包到Linux
2.3 sh 执行
步骤1:生成日志文件:com.ruoze.mocklog.MockLogData 脚本 daily_mockLog.sh
步骤2:日志文件加载到hdfs 脚本 daily_putFileToHDFS.sh
步骤3:mr清洗数据 com.ruoze.mr.LogCleanDriver 脚本 daily_mr_cleanLog.sh
步骤4:hive表加载数据 脚本 daily_ods_load_data.sh
步骤5:select province, sum(requestsize), count(1) from lixian1.ods_log where month = ‘202008’ and day = ‘01’ group by province;
daily_mockLog.sh
#!/bin/bash
#---------------------------------------------
#FileName: daily_mockLog.sh
#Version: 1.0
#Date: 2020-09-05
#Author: ifeng
#Description: 生成模拟日志数据,不传参数默认生成昨天的日志,传参数生成指定名称的日志
#Notes: project ....
#---------------------------------------------
#set -u
#昨天的日期 加上.log后缀
DATE=`date -d '-1 day' +"%Y%m%d"`.log
#如果参数不为空,则替换DATE
if [[ -n "$1" ]]; then
DATE=$1
fi
#文件生成位置为:
java -cp /data/Log_mr/com.MapReduce-1.0.jar com.MapReduce.mockLog.MockLogData ${DATE}
daily_putFileToHDFS.sh
#!/bin/bash
#---------------------------------------------
#FileName: daily_putFileToHDFS.sh
#Version: 1.0
#Date: 2020-09-05
#Author: ifeng
#Description: 将生成的模拟日志数据 put到hdfs上,可以指定文件名,默认是刚生成的那个文件
#Notes: project ....
#---------------------------------------------
#set -u
#昨天的日期 加上.log后缀
FILENAME=`date -d '-1 day' +"%Y%m%d"`.log
#如果参数不为空,则替换FILENAME
if [[ -n "$1" ]]; then
FILENAME=$1
fi
#文件生成位置为:/home/ruoze/data/log
hdfs dfs -put /data/Log_mr/${FILENAME} /project1/input/log
daily_mr_cleanLog.sh
#!/bin/bash
#---------------------------------------------
#FileName: daily_mr_cleanLog.sh
#Version: 1.0
#Date: 2020-08-06
#Author: GYZ
#Description: 使用mr对数据清洗 默认,清洗日期为昨天的数据,可以传参指定清洗文件
#Notes: project ....
#---------------------------------------------
#set -u
#昨天的日期 加上.log后缀
DATE=`date -d '-1 day' +"%Y%m%d"`.log
#如果参数不为空,则替换DATE
if [[ -n "$1" ]]; then
DATE=$1
fi
export LIBJARS=/data/Log_mr/ip2region-1.7.jar
export HADOOP_CLASSPATH=/data/Log_mr/ip2region-1.7.jar
hadoop jar /home/ruoze/lib/lixian-project1-1.0.jar com.MapReduce.mr.LogCleanDriver -libjars ${LIBJARS} /project1/input/log/${DATE} /project1/output/log/${DATE}