问题描述:统计某个号码被哪些号码呼叫了
输入文件如下:
13588888888 112
13678987879 13509098987
18987655436 110
2543789 112
15699807656 110
011-678987 112
说明:每一行为一条电话通话记录,左边的号码(记为a)打给右边的号码(记为b号码),中间用空格隔开
要求:
将以上文件以如下格式输出:
110 18987655436|15699807656
112 13588888888|011-678987
13509098987 13678987879
说明:左边为被呼叫的号码b,右边为呼叫b的号码a以"|"分割
解决思想很简单:Map中将a,b号码分割key为b,value为a写入context
Reduce中将values以"|"迭代分割
初始化job 时要注意输入输出类型:
job.setInputFormatClass(TextInputFormat.class); //数据到达map前,输入给map函数的时数据的组织结构形式,hadoop 会根据你设置的格式给map函数传递相应的key,value 实参,
如TextInputFormat格式是默认的数据类型,它给map传递的key是每一行的第一的字节的在整个输入位置,value 就是所在行的数据值。
job.setOutputFormatClass(TextOutputFormat.class);//默认格式,将key value转出字符串输出
package hadooptest3;
import java.io.File;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import hadooptest3.EJob;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class hadooptes3 extends Configured implements Tool
{
//计数器
enum Counter
{
LINESKIP,//出错行
}
@Override
public int run(String[] args) throws Exception
{
if(args.length!=2)
{
System.err.printf("Usage: %s\n", getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
// Add these statements. XXX
File jarFile = EJob.createTempJar("bin");
ClassLoader classLoader = EJob.getClassLoader();
Thread.currentThread().setContextClassLoader(classLoader);
Configuration conf =getConf(); //new Configuration();
conf.set("mapred.job.tracker", "Master.Hadoop:54311");
Job job=new Job(conf,"PhoneNumber");
((JobConf)job.getConfiguration()).setJar(jarFile.toString());
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setJarByClass(hadooptes3.class);
job.setMapperClass(mapper.class);
//job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setOutputFormatClass(OutputFormat.class);
job.waitForCompletion(true);
System.out.println("任务名称:"+job.getJobName());
System.out.println("任务成功:"+(job.isSuccessful() ? "是":"否"));
System.out.println("输入行数:"+job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue());
System.out.println("输出行数:"+job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue());
System.out.println("跳过行数:"+job.getCounters().findCounter(Counter.LINESKIP).getValue());
return 0;
}
public static void main(String[] args) throws Exception
{
int exitCode=ToolRunner.run(new hadooptes3(), args);
System.exit(exitCode);
}
//mapper
public static class mapper extends Mapper
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
try
{
String[] stringArr=line.split(" ");
String anum=stringArr[0];
String bnum=stringArr[1];
context.write(new Text(bnum), new Text(anum));
}
catch(Exception e)
{
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
public static class reducer extends Reducer
{
public void reduce(Text key, Iterable
values, Context context) throws IOException, InterruptedException
{
String Valuestring;
String out="";
for(Text val:values)
{
Valuestring=val.toString();
out+=Valuestring+"|";
}
context.write(key, new Text(out));
}
}
}
package hadooptest3;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;
public class EJob {
// To declare global field
private static List
classPath = new ArrayList
(); // To declare method public static File createTempJar(String root) throws IOException { if (!new File(root).exists()) { return null; } Manifest manifest = new Manifest(); manifest.getMainAttributes().putValue("Manifest-Version", "1.0"); final File jarFile = File.createTempFile("EJob-", ".jar", new File(System.getProperty("java.io.tmpdir"))); Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { jarFile.delete(); } }); JarOutputStream out = new JarOutputStream(new FileOutputStream(jarFile), manifest); createTempJarInner(out, new File(root), ""); out.flush(); out.close(); return jarFile; } private static void createTempJarInner(JarOutputStream out, File f, String base) throws IOException { if (f.isDirectory()) { File[] fl = f.listFiles(); if (base.length() > 0) { base = base + "/"; } for (int i = 0; i < fl.length; i++) { createTempJarInner(out, fl[i], base + fl[i].getName()); } } else { out.putNextEntry(new JarEntry(base)); FileInputStream in = new FileInputStream(f); byte[] buffer = new byte[1024]; int n = in.read(buffer); while (n != -1) { out.write(buffer, 0, n); n = in.read(buffer); } in.close(); } } public static ClassLoader getClassLoader() { ClassLoader parent = Thread.currentThread().getContextClassLoader(); if (parent == null) { parent = EJob.class.getClassLoader(); } if (parent == null) { parent = ClassLoader.getSystemClassLoader(); } return new URLClassLoader(classPath.toArray(new URL[0]), parent); } public static void addClasspath(String component) { if ((component != null) && (component.length() > 0)) { try { File f = new File(component); if (f.exists()) { URL key = f.getCanonicalFile().toURL(); if (!classPath.contains(key)) { classPath.add(key); } } } catch (IOException e) { } } } }