Flume学习笔记
一作业
1、题目
注:步骤2到步骤5中Linux的crontab来运行,不得单独用命令来执行
作业提示:写个*.sh脚本,运行此脚本。全部命令直接依次跑完 结果展示:a.截图脚本 b.截图hdfs的homework 1).上传此access20180530.log到/opt/logs中 2).自己编写flume配置文件传输access20180530.log到hdfs的/logs中 3).编写MR程序过滤此日志,过滤后数据仅需要IP,时间和url。
注:过滤了静态资源GET /static/和 尾段 HTTP/1.1
4).输出到hdfs的/homework中 5).下载此日志到linux的/opt/homework目录中 附件: access201805_30.log58.25 M
过滤后到数据结果图{width=“5.833333333333333in” height=“3.384433508311461in”}
2、编写MR代码对日志进行过滤
a). LogMapper
package LogBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class LogMapper extends Mapper<LongWritable,Text,LongWritable,Text> {
LogBean logBean = new LogBean();
Text k = new Text();
@Override
protected void map(LongWritable key,
Text value,
Context context)
throws IOException, InterruptedException {
//1.Text ===> String
String line = value.toString();
String[] fields = logBean.parse(line);
//String[] fields = line.split(" ");
String ip = fields[0];
String logtime = fields[1];
String url = fields[2];
if (url.startsWith("GET /static")||url.startsWith("GET /uc_server")){
return;
}
if (url.startsWith("GET")){
url = url.substring("GET ".length()+1, url.length()-"HTTP/1.1".length());
}
if (url.startsWith("POST")){
url = url.substring("POST ".length()+1, url.length()-"HTTP/1.1".length());
}
k.set(ip + "\t" + logtime + "\t" + url);
context.write(key,k);
}
}
b). LogReducer
package LogBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class LogReducer extends Reducer<LongWritable, Text,Text, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text k :values){
context.write(k,NullWritable.get());
}
}
}
c).LogBean
package LogBean;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
public class LogBean {
public static SimpleDateFormat FORMAT = new SimpleDateFormat("d/MMM/yyyy:HH:mm:ss", Locale.ENGLISH);
public static SimpleDateFormat dateformat1 =new SimpleDateFormat("yyyyMMddHHmmss");
public String[] parse(String line) {
String ip = parseIP(line);
String logtime;
try{
logtime = parseTime(line);
}catch (Exception e){
logtime = "null";
}
String url;
try{
url = parseURL(line);
}catch (Exception e){
url = "null";
}
String status = parseStatus(line);
String traffic = parseTraffic(line);
return new String[]{ip,logtime,url,status,traffic};
}
private String parseTraffic(String line){
String trim = line.substring(line.lastIndexOf("\"")+1).trim();
String traffic = trim.split(" ")[1];
return traffic;
}
private String parseStatus(String line){
String trim;
try{
trim = line.substring(line.lastIndexOf("\"")+1).trim();
}catch (Exception e){
trim = "null";
}
String status = trim.split(" ")[0];
return status;
}
private String parseURL(String line){
int first = line.indexOf("\"");
int last = line.lastIndexOf("\"");
String url = line.substring(first+1,last);
return url;
}
private String parseTime(String line){
int first = line.indexOf("[");
int last = line.indexOf("+0800]");
String logtime = line.substring(first+1,last).trim();
try{
return dateformat1.format(FORMAT.parse(logtime));
}catch (ParseException e){
e.printStackTrace();
}
return "";
}
private String parseIP(String line){
String ip = line.split("- -")[0].trim();
return ip;
}
}
d).LogDriver
package LogBean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class LogDriver {
public static void main(String[] args) throws Exception {
//args = new String[]{"D:\\A-Programming\\A_data\\Input\\ETL.log","D:\\A-Programming\\A_data\\output\\web6"};
//在windows下测试,需写路径,在linux里头就不需要在这里写
// 1 获取job信息
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 2 加载jar包
job.setJarByClass(LogDriver.class);
// 3 关联map
job.setMapperClass(LogMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//4、关联reducer
job.setReducerClass(LogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 5 设置输入和输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 6 提交
job.waitForCompletion(true);
}
}
e).将以上程序打包成jar
在本地调试代码成功
![](D:\A-Programming\BAT\实验结果图片\flume (7).PNG)
查看输出结果
![](D:\A-Programming\BAT\实验结果图片\flume (1).PNG)
打包前先clean
![](D:\A-Programming\BAT\实验结果图片\flume (1).jpg)
![](D:\A-Programming\BAT\实验结果图片\flume (2).jpg)
f).执行jar包命令
这样就得到可以在hadoop集群中处理日志的MR的jar包了,单独在linux里头的执行命令如下,"$"后面跟的都是对应路径变量。
$HADOOP_CMD jar $STREAM_JAR_PATH LogBean.LogDriver $INPUT_FILE_PATH $OUTPUT_PATH
示例:
hadoop jar /opt/ETL/LogBean-1.0-SNAPSHOT.jar LogBean.LogDriver /flume/logs/Logg.txt /homework
$FLUME_NG agent --conf $CONF --name a2 --conf-file $FLUME_HDFS -Dflume.root.logger=INFO,console
/usr/local/src/flume1.8.0/bin/flume-ng agent --conf /usr/local/src/flume1.8.0/conf/ --name a2 --conf-file /opt/ETL/flume-hdfs.conf -Dflume.root.logger=INFO,console
3、脚本
3.1Flume配置文件
flume-hdfs.conf
#### #1.定义agent的名字a2
a2.sources = r2
a2.sinks = k2
a2.channels = c2
#### #2.定义Source
a2.sources.r2.type = exec
a2.sources.r2.command = cat /opt/logs/access_2018_05_30.log
a2.sources.r2.shell = /bin/bash -c
#### #3.定义sink
a2.sinks.k2.type = hdfs
a2.sinks.k2.hdfs.path = hdfs://master:9000/flume/
#设置文件类型,可支持压缩
a2.sinks.k2.hdfs.fileType = DataStream
a2.sinks.k2.hdfs.writeFormat = Text
#积攒多少个Event才flush到HDFS一次
a2.sinks.k2.hdfs.batchSize = 1000
#最小副本数
a2.sinks.k2.hdfs.minBlockReplicas = 2
#多久生成一个新的文件
a2.sinks.k2.hdfs.rollInterval = 120
a2.sinks.k2.hdfs.appendTimeout = 1000
#设置每个文件的滚动大小
a2.sinks.k2.hdfs.rollSize = 280971520000
#文件的滚动与Event数量无关
a2.sinks.k2.hdfs.rollCount = 600000
a2.sinks.k2.hdfs.txnEventMax = 100000
a2.sinks.k2.hdfs.threadsPoolSize = 100
#### #4.定义Channel
a2.channels.c2.type = memory
a2.channels.c2.capacity = 1000
a2.channels.c2.transactionCapacity = 1000
a2.channels.c2.byteCapacity = 800000
#### #5.链接
a2.sources.r2.channels = c2
a2.sinks.k2.channel = c2
3.2 sh脚本
HADOOP_CMD hadoop 安装路径如"/usr/local/src/hadoop-2.9.0/bin/hadoop"
STREAM_JAR_PATH 前面所生成LogBean-1.0-SNAPSHOT.jar的路径
INPUT_FILE_PATH 要处理的日志存在于HDFS上的路径
OUTPUT_PATH 过滤日志结果在HDFS上的输出路径
DOWN_LOAD_PATH 下载HDFS输出结果到本地的目录(/opt/homework)
#!/bin/bash
#1、define the PATH
HADOOP_CMD="/usr/local/src/hadoop-2.9.0/bin/hadoop"
STREAM_JAR_PATH="/opt/ETL/LogBean-1.0-SNAPSHOT.jar"
INPUT_FILE_PATH="/flume"
OUTPUT_PATH="/homework"
DOWN_LOAD_PATH="/opt/homework"
LOG_DIR="/opt/logs"
FILE=access_2018_05_30.log
FLUME_NG="/usr/local/src/flume1.8.0/bin/flume-ng"
CONF="/usr/local/src/flume1.8.0/conf/"
FLUME_HDFS="/opt/ETL/flume-hdfs.conf"
#2、Delete the input and output paths on HDFS
$HADOOP_CMD fs -rmr -skipTrash $INPUT_FILE_PATH
$HADOOP_CMD fs -rmr -skipTrash $OUTPUT_PATH
#3、Upload data to HDFS through Flume
$FLUME_NG agent --conf $CONF --name a2 --conf-file $FLUME_HDFS -Dflume.root.logger=INFO,console
#4、Run the jar package and download the result
$HADOOP_CMD jar $STREAM_JAR_PATH LogBean.LogDriver $INPUT_FILE_PATH $OUTPUT_PATH
rm -rf $DOWN_LOAD_PATH
hadoop fs -get $OUTPUT_PATH $DOWN_LOAD_PATH
3.3运行脚本
bash run.sh
[root@master ETL]# bash run.sh
rmr: DEPRECATED: Please use '-rm -r' instead.
rmr: `/flume': No such file or directory
rmr: DEPRECATED: Please use '-rm -r' instead.
rmr: `/homework': No such file or directory
Info: Sourcing environment configuration script /usr/local/src/flume1.8.0/conf/flume-env.sh
Info: Including Hadoop libraries found via (/usr/local/src/hadoop-2.9.0/bin/hadoop) for HDFS access
Info: Including HBASE libraries found via (/usr/local/src/hbase-0.98.6-hadoop2/bin/hbase) for HBASE access
Info: Including Hive libraries found via (/usr/local/src/hive1.2.2) for Hive access
+ exec /usr/local/src/jdk1.8.0_202/bin/java -Xmx20m -Dflume.root.logger=INFO,console -cp '/usr/local/src/flume1.8.0/conf:/usr/local/src/flume1.8.0/lib/*:/usr/local/src/hadoop-2.9.0/etc/hadoop:/usr/local/src/hadoop-2.9.0/share/hadoop/common/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/common/*:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs/*:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn/*:/usr/local/src/hadoop-2.9.0/share/hadoop/mapreduce/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/mapreduce/*:/usr/local/src/hadoop-2.9.0/contrib/capacity-scheduler/*.jar:/usr/local/src/hbase-0.98.6-hadoop2/conf:/usr/local/src/jdk1.8.0_202/lib/tools.jar:/usr/local/src/hbase-0.98.6-hadoop2:/usr/local/src/hbase-0.98.6-hadoop2/lib/activation-1.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/aopalliance-1.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/asm-3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/avro-1.7.4.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-beanutils-1.7.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-beanutils-core-1.8.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-cli-1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-codec-1.7.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-collections-3.2.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-compress-1.4.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-configuration-1.6.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-daemon-1.0.13.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-digester-1.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-el-1.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-httpclient-3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-io-2.4.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-lang-2.6.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-logging-1.1.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-math-2.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/commons-net-3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/findbugs-annotations-1.3.9-1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/gmbal-api-only-3.0.0-b023.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/grizzly-framework-2.1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/grizzly-http-2.1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/grizzly-http-server-2.1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/grizzly-http-servlet-2.1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/grizzly-rcm-2.1.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/guava-12.0.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/guice-3.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/guice-servlet-3.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-annotations-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-auth-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-client-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-common-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-hdfs-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-mapreduce-client-app-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-mapreduce-client-common-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-mapreduce-client-core-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-mapreduce-client-shuffle-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-yarn-api-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-yarn-client-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-yarn-common-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-yarn-server-common-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hadoop-yarn-server-nodemanager-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hamcrest-core-1.3.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-client-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-common-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-common-0.98.6-hadoop2-tests.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-examples-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-hadoop2-compat-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-hadoop-compat-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-it-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-it-0.98.6-hadoop2-tests.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-prefix-tree-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-protocol-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2-tests.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-shell-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-testing-util-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/hbase-thrift-0.98.6-hadoop2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/high-scale-lib-1.1.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/htrace-core-2.04.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/httpclient-4.1.3.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/httpcore-4.1.3.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jackson-core-asl-1.8.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jackson-jaxrs-1.8.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jackson-mapper-asl-1.8.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jackson-xc-1.8.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jamon-runtime-2.3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jasper-compiler-5.5.23.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jasper-runtime-5.5.23.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/javax.inject-1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/javax.servlet-3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/javax.servlet-api-3.0.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jaxb-api-2.2.2.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jaxb-impl-2.2.3-1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-client-1.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-core-1.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-grizzly2-1.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-guice-1.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-json-1.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-server-1.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-test-framework-core-1.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jersey-test-framework-grizzly2-1.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jets3t-0.6.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jettison-1.3.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jetty-6.1.26.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jetty-sslengine-6.1.26.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jetty-util-6.1.26.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jruby-complete-1.6.8.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jsch-0.1.42.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jsp-2.1-6.1.14.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jsp-api-2.1-6.1.14.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/jsr305-1.3.9.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/junit-4.11.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/libthrift-0.9.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/log4j-1.2.17.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/management-api-3.0.0-b012.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/metrics-core-2.2.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/netty-3.6.6.Final.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/paranamer-2.3.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/protobuf-java-2.5.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/servlet-api-2.5-6.1.14.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/slf4j-api-1.6.4.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/slf4j-log4j12-1.6.4.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/snappy-java-1.0.4.1.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/xmlenc-0.52.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/xz-1.0.jar:/usr/local/src/hbase-0.98.6-hadoop2/lib/zookeeper-3.4.6.jar:/usr/local/src/hadoop-2.9.0/etc/hadoop:/usr/local/src/hadoop-2.9.0/share/hadoop/common/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/common/*:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/hdfs/*:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/yarn/*:/usr/local/src/hadoop-2.9.0/share/hadoop/mapreduce/lib/*:/usr/local/src/hadoop-2.9.0/share/hadoop/mapreduce/*:/usr/local/src/hadoop-2.9.0/contrib/capacity-scheduler/*.jar:/usr/local/src/hbase-0.98.6-hadoop2/conf:/usr/local/src/hive1.2.2/lib/*' -Djava.library.path=:/usr/local/src/hadoop-2.9.0/lib/native:/usr/local/src/hadoop-2.9.0/lib/native org.apache.flume.node.Application --name a2 --conf-file /opt/ETL/flume-hdfs.conf
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/src/flume1.8.0/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/src/hadoop-2.9.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/src/hbase-0.98.6-hadoop2/lib/slf4j-log4j12-1.6.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
2019-05-08 22:19:49,585 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.node.PollingPropertiesFileConfigurationProvider.start(PollingPropertiesFileConfigurationProvider.java:62)] Configuration provider starting
2019-05-08 22:19:49,589 (conf-file-poller-0) [INFO - org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:134)] Reloading configuration file:/opt/ETL/flume-hdfs.conf
2019-05-08 22:19:49,594 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,594 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,594 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,594 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: k2 Agent: a2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,595 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,596 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,596 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k2
2019-05-08 22:19:49,612 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a2]
2019-05-08 22:19:49,612 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-05-08 22:19:49,643 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel c2 type memory
2019-05-08 22:19:49,661 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel c2
2019-05-08 22:19:49,661 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source r2, type exec
2019-05-08 22:19:49,668 (conf-file-poller-0) [INFO - org.apache.flume.sink.DefaultSinkFactory.create(DefaultSinkFactory.java:42)] Creating instance of sink: k2, type: hdfs
2019-05-08 22:19:49,677 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:116)] Channel c2 connected to [r2, k2]
2019-05-08 22:19:49,682 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:137)] Starting new configuration:{ sourceRunners:{r2=EventDrivenSourceRunner: { source:org.apache.flume.source.ExecSource{name:r2,state:IDLE} }} sinkRunners:{k2=SinkRunner: { policy:org.apache.flume.sink.DefaultSinkProcessor@6d7f6b4a counterGroup:{ name:null counters:{} } }} channels:{c2=org.apache.flume.channel.MemoryChannel{name: c2}} }
2019-05-08 22:19:49,690 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:144)] Starting Channel c2
2019-05-08 22:19:49,764 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: CHANNEL, name: c2: Successfully registered new MBean.
2019-05-08 22:19:49,764 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: CHANNEL, name: c2 started
2019-05-08 22:19:49,764 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:171)] Starting Sink k2
2019-05-08 22:19:49,765 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:182)] Starting Source r2
2019-05-08 22:19:49,765 (lifecycleSupervisor-1-2) [INFO - org.apache.flume.source.ExecSource.start(ExecSource.java:168)] Exec source starting with command: cat /opt/logs/access_2018_05_30.log
2019-05-08 22:19:49,766 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: SINK, name: k2: Successfully registered new MBean.
2019-05-08 22:19:49,766 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: SINK, name: k2 started
2019-05-08 22:19:49,769 (lifecycleSupervisor-1-2) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: SOURCE, name: r2: Successfully registered new MBean.
2019-05-08 22:19:49,770 (lifecycleSupervisor-1-2) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: SOURCE, name: r2 started
2019-05-08 22:19:49,861 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.hdfs.HDFSDataStream.configure(HDFSDataStream.java:57)] Serializer = TEXT, UseRawLocalFileSystem = false
2019-05-08 22:19:49,954 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.hdfs.BucketWriter.open(BucketWriter.java:251)] Creating hdfs://master:9000/flume//FlumeData.1557379189862.tmp
2019-05-08 22:21:03,468 (pool-5-thread-1) [INFO - org.apache.flume.source.ExecSource$ExecRunnable.run(ExecSource.java:372)] Command [cat /opt/logs/access_2018_05_30.log] exited with 0
2019-05-08 22:21:50,833 (hdfs-k2-roll-timer-0) [INFO - org.apache.flume.sink.hdfs.BucketWriter.close(BucketWriter.java:393)] Closing hdfs://master:9000/flume//FlumeData.1557379189862.tmp
2019-05-08 22:21:51,873 (hdfs-k2-call-runner-11) [INFO - org.apache.flume.sink.hdfs.BucketWriter$8.call(BucketWriter.java:655)] Renaming hdfs://master:9000/flume/FlumeData.1557379189862.tmp to hdfs://master:9000/flume/FlumeData.1557379189862
2019-05-08 22:21:51,878 (hdfs-k2-roll-timer-0) [INFO - org.apache.flume.sink.hdfs.HDFSEventSink$1.run(HDFSEventSink.java:382)] Writer callback called.
3.4运行MapReduce
上传文件完毕之后,Ctrl+C停止flume配置文件,就可以执行MR
19/05/08 23:09:05 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.10.20:8032
19/05/08 23:09:07 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
19/05/08 23:09:07 INFO input.FileInputFormat: Total input files to process : 1
19/05/08 23:09:08 INFO mapreduce.JobSubmitter: number of splits:1
19/05/08 23:09:09 INFO Configuration.deprecation: yarn.resourcemanager.system-metrics-publisher.enabled is deprecated. Instead, use yarn.system-metrics-publisher.enabled
19/05/08 23:09:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1557374842457_0002
19/05/08 23:09:12 INFO impl.YarnClientImpl: Submitted application application_1557374842457_0002
19/05/08 23:09:14 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1557374842457_0002/
19/05/08 23:09:14 INFO mapreduce.Job: Running job: job_1557374842457_0002
19/05/08 23:11:24 INFO mapreduce.Job: Job job_1557374842457_0002 running in uber mode : false
19/05/08 23:11:24 INFO mapreduce.Job: map 0% reduce 0%
19/05/08 23:15:29 INFO mapreduce.Job: map 45% reduce 0%
19/05/08 23:15:45 INFO mapreduce.Job: map 100% reduce 0%
19/05/08 23:16:17 INFO mapreduce.Job: map 100% reduce 100%
19/05/08 23:16:21 INFO mapreduce.Job: Job job_1557374842457_0002 completed successfully
19/05/08 23:16:23 INFO mapreduce.Job: Counters: 49
4.查看输出结果
4.1查看hdfs
[root@master ETL]# hadoop fs -ls -h /flume
Found 1 items
-rw-r--r-- 3 root supergroup 57.7 M 2019-05-08 23:08 /flume/FlumeData.1557382004754
[root@master ETL]# hadoop fs -ls -h /homework
Found 2 items
-rw-r--r-- 3 root supergroup 0 2019-05-08 23:16 /homework/_SUCCESS
-rw-r--r-- 3 root supergroup 12.3 M 2019-05-08 23:16 /homework/part-r-00000
4.2查看文档命令
可以使用head(查看前几行)、tail(查看末尾几行)两个命令。
例如:
查看/etc/profile的前10行内容,应该是:
# head -n 10 /etc/profile
查看/etc/profile的最后5行内容,应该是:
# tail -n 5 /etc/profile
如果想同时查看可以将前10行和后5行的显示信息通过输出重定向的方法保存到一个文档,这样查看文档即可一目了然。
例如:
将内容输出到/home/test文件中
# head -n 10 /etc/profile >>/home/test
# tail -n 5 /etc/profile>>/home/test
查看的话只需要打开test文件即可。
cat /home/test
【一】从第3000行开始,显示1000行。即显示3000~3999行
cat filename | tail -n +3000 | head -n 1000
【二】显示1000行到3000行
cat filename| head -n 3000 | tail -n +1000
*注意两种方法的顺序
分解:
tail -n 1000:显示最后1000行
tail -n +1000:从1000行开始显示,显示1000行以后的
head -n 1000:显示前面1000行
【三】用sed命令
sed -n '5,10p' filename 这样你就可以只查看文件的第5行到第10行。
4.3查看本地目录 /opt/homework
[root@master homework]# ll -h
total 13M
-rw-r--r-- 1 root root 13M May 8 22:41 part-r-00000
-rw-r--r-- 1 root root 0 May 8 22:40 _SUCCESS
[root@master homework]# pwd
/opt/homework
[root@master homework]# head -n 10 part-r-00000
110.52.250.126 20180530173820 data/cache/style_1_widthauto.css?y7a
110.52.250.126 20180530173820 source/plugin/wsh_wx/img/wsh_zk.css
110.52.250.126 20180530173820 data/cache/style_1_forum_index.css?y7a
110.52.250.126 20180530173820 source/plugin/wsh_wx/img/wx_jqr.gif
27.19.74.143 20180530173820 data/attachment/common/c8/common_2_verify_icon.png
27.19.74.143 20180530173820 data/cache/common_smilies_var.js?y7a
8.35.201.165 20180530173822 data/attachment/common/c5/common_13_usergroup_icon.jpg
220.181.89.156 20180530173820 thread-24727-1-1.html
211.97.15.179 20180530173822 data/cache/style_1_forum_index.css?y7a
211.97.15.179 20180530173822 data/cache/style_1_widthauto.css?y7a
![](D:\A-Programming\BAT\实验结果图片\flume (4).PNG)
![](D:\A-Programming\BAT\实验结果图片\flume (5).PNG)