mapreduce Multiple
package mr.mulit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MultipleJob extends Configuration implements Tool{
@Override
public Configuration getConf() {
return new Configuration();
}
@Override
public void setConf(Configuration conf) {
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "MultipleJob");
job.setJarByClass(MultipleJob.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), FirstInputFormat.class, FirstMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), SecondInputFormat.class, SecondMapper.class);
Path path = new Path(args[2]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(path)){
fs.delete(path,true);
}
FileOutputFormat.setOutputPath(job, path);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new MultipleJob(), args);
}
}
package mr.mulit;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class FirstClass implements Writable {
private String value;
public FirstClass() {}
public FirstClass(String value) {
this.value = value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
@Override
public String toString() {
return "FirstClass [value=" + value + "]";
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.value);
}
@Override
public void readFields(DataInput in) throws IOException {
this.value = in.readUTF();
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class FirstInputFormat extends FileInputFormat<Text, FirstClass> {
@Override
public RecordReader<Text, FirstClass> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException,
InterruptedException {
return new FirstRecordReader();
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class FirstMapper extends Mapper<Text, FirstClass, Text, Text> {
@Override
protected void map(Text key, FirstClass value,
Mapper<Text, FirstClass, Text, Text>.Context context)
throws IOException, InterruptedException {
context.write(key, new Text(value.toString()));
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
public class FirstRecordReader extends RecordReader<Text, FirstClass> {
private LineRecordReader lineRecordReader = null;
private Text key = null;
private FirstClass value = null;
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
close();
lineRecordReader = new LineRecordReader();
lineRecordReader.initialize(split, context);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if(!lineRecordReader.nextKeyValue()){
key = null;
value =null;
return false;
}else{
Text val = lineRecordReader.getCurrentValue();
String line = val.toString();
String[] arr = line.split("t");
key = new Text(arr[0]);
value = new FirstClass(arr[1]);
return true;
}
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return key;
}
@Override
public FirstClass getCurrentValue() throws IOException,
InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return lineRecordReader.getProgress();
}
@Override
public void close() throws IOException {
if(null != lineRecordReader){
lineRecordReader.close();
lineRecordReader =null;
}
key = null;
value =null;
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MultipleReducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, Text, Text>.Context context) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
for (Text text : values) {
context.write(key, text);
}
}
}
package mr.mulit;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class SecondClass implements Writable {
private String userName;
private int classNum;
public SecondClass() {
}
public SecondClass(String userName, int classNum) {
this.userName = userName;
this.classNum = classNum;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public int getClassNum() {
return classNum;
}
public void setClassNum(int classNum) {
this.classNum = classNum;
}
@Override
public String toString() {
return "SecondClass [userName=" + userName + ", classNum=" + classNum
+ "]";
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.userName);
out.writeInt(this.classNum);
}
@Override
public void readFields(DataInput in) throws IOException {
this.userName = in.readUTF();
this.classNum = in.readInt();
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class SecondInputFormat extends FileInputFormat<Text, SecondClass> {
@Override
public RecordReader<Text, SecondClass> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException,
InterruptedException {
return new SecondRecordReader();
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SecondMapper extends Mapper<Text, SecondClass, Text, Text> {
@Override
protected void map(Text key, SecondClass value,
Mapper<Text, SecondClass, Text, Text>.Context context)
throws IOException, InterruptedException {
context.write(key, new Text(value.toString()));
}
}
package mr.mulit;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
public class SecondRecordReader extends RecordReader<Text, SecondClass> {
private LineRecordReader lineRecordReader = null;
private Text key = null;
private SecondClass value = null;
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
close();
lineRecordReader = new LineRecordReader();
lineRecordReader.initialize(split, context);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if(!lineRecordReader.nextKeyValue()){
key = null;
value =null;
return false;
}else{
Text val = lineRecordReader.getCurrentValue();
String line = val.toString();
String[] arr = line.split("t");
key = new Text(arr[0]);
value = new SecondClass(arr[1],Integer.parseInt(arr[2]));
return true;
}
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return key;
}
@Override
public SecondClass getCurrentValue() throws IOException,
InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return lineRecordReader.getProgress();
}
@Override
public void close() throws IOException {
if(null != lineRecordReader){
lineRecordReader.close();
lineRecordReader =null;
}
key = null;
value =null;
}
}