map过程中获得正在读取的文件名称
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
//获取 input split 所在的文件名
String curFileName = ((FileSplit)context.getInputSplit()).getPath().getName();
//获得文件路径
String curFilePath = ((FileSplit)context.getInputSplit()).getPath().toString();
个人有个小疑问,这个调用是放在map方法中还是放在setup方法中?
如果放在setup方法中就可以避免每次map方法都重新获得一次文件名,但是如果放在setup方法中获取文件名的话,使用CombineFilelnputFormat的时候会不会获得错误的文件名?希望有大神解答
map/reduce过程中设置计数器
context.getCounter("GroupName", "FieldName").increment(1);
reduce多目录/自定义文件名输出
package com.demo;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
/**
* @author chichuduxing
* @date 2017年3月31日 下午1:58:33
*/
public class MyReduce extends Reducer<Text, Text, Text, Text> {
private MultipleOutputs<Text, Text> _mos;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
_mos = new MultipleOutputs<Text, Text>(context);
}
long _count = 0l;
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// 定义job的时候设置
// MultipleOutputs.addNamedOutput(job,"result",TextOutputFormat.class,Text.class,Text.class);
// MultipleOutputs.addNamedOutput(job,"count",TextOutputFormat.class,Text.class,LongWritable.class);
_count = 0;
for (Text value : values) {
_count++;
context.write(key, value);
// 默认输出,文件名为:part-r-00000
_mos.write("result", key, values);
// 输出文件名样例:result-r-00000
_mos.write("result", key, value, key + "/");
// 输出文件名样例:-r-00000
// 文件会被按照key分类,放在对应的以key命名的文件夹里面
}
_mos.write("count", key, new LongWritable(_count));
// 输出文件名样例:count-m-0000
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
// 一定要记得关闭
_mos.close();
}
}
多个MapReduce设置依赖关系
package com.demo;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
/**
* @author chichuduxing
* @date 2017年3月31日 下午1:39:18
*/
public class MyMain {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
Job job1 = Job.getInstance(conf, "JOB1");
Job job2 = Job.getInstance(conf, "JOB2");
// 具体job设置略
ControlledJob controlledJob1 = new ControlledJob(job1.getConfiguration());
controlledJob1.setJob(job1);
ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
controlledJob2.setJob(job2);
// 设置依赖
controlledJob2.addDependingJob(controlledJob1);
// 主控制器
JobControl jc = new JobControl("JobControl");
jc.addJob(controlledJob1);
jc.addJob(controlledJob2);
Thread jcThread = new Thread(jc);
jcThread.start();
while (true) {
if (jc.allFinished()) {
System.out.println(jc.getSuccessfulJobList());
jc.stop();
return;
}
if (jc.getFailedJobList().size() > 0) {
System.out.println(jc.getFailedJobList());
jc.stop();
return;
}
}
}
}
MapReduce自定义类型做key/value
如果是做value的话,只要实现Writable接口就行了,但是如果做key的话,则需要实现WritableComparable接口
如果要添加一个自定义的构造函数用于自定义的Writable类一定要保持默认的空构造函数
如果使用TextOutputFormat序列化自定义Writable类型的实例。要确保用于自定义的Writable数据类型有一个有意义的toString()实现
在读取输入数据时,Hadoop课重复使用Writable类的一个实例。在readFileds()方法里面填充字段时,不应该依赖与该对象的现 有状态
package com.bean;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
* @author chichuduxing
* @date 2017年3月31日 下午3:20:39
*/
public class MyValue implements WritableComparable<MyValue> {
public String name;
public int counter;
public long timestamp;
public MyValue() {
}
public MyValue(String name, int counter, long timestamp) {
this.name = name;
this.counter = counter;
this.timestamp = timestamp;
}
@Override
public void readFields(DataInput in) throws IOException {
name = in.readUTF();
counter = in.readInt();
timestamp = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeInt(counter);
out.writeLong(timestamp);
}
@Override
public int compareTo(MyValue compareValue) {
if (this == compareValue) {
return 0;
}
if (null == this.name && null != compareValue.name) {
return -1;
} else if (this.name == compareValue.name || this.name.equals(compareValue.name)) {
if (this.counter == compareValue.counter) {
if (this.timestamp == compareValue.timestamp) {
return 0;
} else {
return this.timestamp > compareValue.timestamp ? 1 : -1;
}
} else {
return this.counter > compareValue.counter ? 1 : -1;
}
} else if (null == compareValue.name) {
return 1;
} else {
return this.name.compareTo(compareValue.name);
}
}
@Override
public String toString() {
return new StringBuilder().append(name).append('\t').append(counter).append('\t').append(timestamp).append('\t')
.toString();
}
}