import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class ThreadUser {
enum Counter{
LINESKIP;
}
public static class TUMapper extends Mapper<LongWritable,Text,Text,Text>{
private String str;
private String[] arr;
private Text userid=new Text();
private Text threadid=new Text();
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
str=value.toString();
arr=str.split(" ");
try{
userid.set(arr[1]);
threadid.set(arr[2]);
context.write(userid, threadid);
}catch(Exception e){
System.out.println(e.getStackTrace());
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
public static class TUReducer extends Reducer<Text,Text,Text,Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{
StringBuffer sb=new StringBuffer();
for(Text val:values){
sb.append(val);
sb.append(",");
}
String[] strarr;
if (sb.indexOf("14280")!=-1)
{ strarr=sb.toString().split(",");
for(int i=0;i<strarr.length;i++)
context.write(new Text(strarr[i]), key);
}
//if (values.iterator().toString().indexOf("14280")!=-1)
//for(Text val:values){
//context.write(new Text(values.iterator().toString()), key);
//}
}
}
public static class TUMapper2 extends Mapper<LongWritable,Text,Text,Text>{
private String str;
private String[] arr;
private Text userid=new Text();
private Text threadid=new Text();
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
str=value.toString();
arr=str.split("\t");
try{
userid.set(arr[1]);
threadid.set(arr[0]);
context.write(threadid , userid);
}catch(Exception e){
System.out.println(e.getStackTrace());
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
public static class TUReducer2 extends Reducer<Text,Text,Text,IntWritable>{
private IntWritable sum=new IntWritable();
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{
int num=0;
for(Text val:values){
num++;
}
sum.set(num);
context.write(key, sum);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("mapred.job.tracker", "192.168.1.23:9001");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Thread <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Thread user");
job.setJarByClass(ThreadUser.class);
job.setMapperClass(TUMapper.class);
//job.setCombinerClass(TSReducer.class);
job.setReducerClass(TUReducer.class); //
FileSystem fs=FileSystem.get(conf);
Path out=new Path("hdfs://192.168.1.23:9000/user/hadoop/temp");
if(fs.exists(out))
fs.delete(out);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, out);
//job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
boolean res=job.waitForCompletion(true);
Job job2 = new Job(conf, "Thread user2");
job2.setJarByClass(ThreadUser.class);
job2.setMapperClass(TUMapper2.class);
//job.setCombinerClass(TSReducer.class);
job2.setReducerClass(TUReducer2.class); //
Path out2=new Path(args[1]);
if(fs.exists(out2))
fs.delete(out2);
FileInputFormat.addInputPath(job2, out);
FileOutputFormat.setOutputPath(job2, out2);
//job.setOutputFormatClass(TextOutputFormat.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
boolean res2=job2.waitForCompletion(true);
// Print out Job finishing status
System.out.println( "Job2 Name: " + job2.getJobName() );
System.out.println( "Job2 Successful: " + ( job2.isSuccessful() ? "Yes" : "No" ) );
System.out.println( "Lines2 of Mapper Input: " + job2.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );
System.out.println( "Lines2 of Reducer Output: " + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() );
System.out.println( "Lines2 skipped: " + job2.getCounters().findCounter(Counter.LINESKIP).getValue() );
if (res)
System.exit(0);
else System.exit(1);
}
}
threaduser
最新推荐文章于 2021-08-31 11:01:33 发布