代码如下:SQL> select * from emp;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- -------------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-12月-80 800 20
7499 ALLEN SALESMAN 7698 20-2月 -81 1600 300 30
7521 WARD SALESMAN 7698 22-2月 -81 1250 500 30
7566 JONES MANAGER 7839 02-4月 -81 2975 20
7654 MARTIN SALESMAN 7698 28-9月 -81 1250 1400 30
7698 BLAKE MANAGER 7839 01-5月 -81 2850 30
7782 CLARK MANAGER 7839 09-6月 -81 2450 10
7839 KING PRESIDENT 17-11月-81 5000 10
7844 TURNER SALESMAN 7698 08-9月 -81 1500 0 30
7900 JAMES CLERK 7698 03-12月-81 950 30
7902 FORD ANALYST 7566 03-12月-81 3000 20
7934 MILLER CLERK 7782 23-1月 -82 1300 10
已选择12行。
SQL> select * from dept;
DEPTNO DNAME LOC
---------- -------------- -------------
10 ACCOUNTING NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON
发现写MapReduce程序单元测试很重要啊,不然调试起来会很麻烦的,这里贴下MRUnit单元测试的代码package homework;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Exercise_4 extends Configured implements Tool{
enum Counter{
LINESKIP;
}
public static class Map extends Mapper<LongWritable, Text, Text, Text>
{
public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException
{
String line = value.toString(); //读取源数据
try
{
if(line.substring(0,10).trim().length() == 4){
String salary = line.substring(59, 69).trim();
String deptno = line.substring(78, 88).trim();
context.write(new Text(deptno), new Text("1" + salary));
}else if (line.substring(0,10).trim().length() == 2){
String deptno = line.substring(0,10).trim();
String dname = line.substring(11, 25).trim();
context.write(new Text(deptno), new Text("2" + dname));
}
}
catch ( java.lang.ArrayIndexOutOfBoundsException e )
{
context.getCounter(Counter.LINESKIP).increment(1); //出错令计数器+1
return;
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>
{
public void reduce ( Text key, Iterable<Text> values, Context context ) throws IOException, InterruptedException
{
String valueString;
Integer sumSalary = 0;
Integer perCount = 0;
Integer avgSalary = 0;
for ( Text value : values )
{
valueString = value.toString();
Long flag = Long.parseLong(valueString.substring(0, 1));
if(flag == 1){
perCount += 1;
sumSalary = sumSalary + Integer.parseInt(((valueString.substring(1))));
}else if(flag == 2){
key = new Text(valueString.substring(1));
}
}
if(perCount == 0){
avgSalary = 0;
}else{
avgSalary = sumSalary / perCount;
}
context.write( key, new Text(sumSalary.toString()+ " " + avgSalary.toString() + " " + perCount.toString()));
//context.write( key, new Text(sumSalary.toString()));
}
}
@Override
public int run(String[] args) throws Exception
{
Job job = new Job();
job.setJarByClass(Exercise_4.class); //指定Class
FileInputFormat.addInputPath( job, new Path(args[0]) ); //输入路径
FileOutputFormat.setOutputPath( job, new Path(args[1]) ); //输出路径
job.setMapperClass( Map.class ); //调用上面Map类作为Map任务代码
job.setReducerClass ( Reduce.class ); //调用上面Reduce类作为Reduce任务代码
job.setOutputFormatClass( TextOutputFormat.class );
job.setOutputKeyClass( Text.class ); //指定输出的KEY的格式
job.setOutputValueClass( Text.class ); //指定输出的VALUE的格式
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
/**
* 设置系统说明
* 设置MapReduce任务
*/
public static void main(String[] args) throws Exception
{
//判断参数个数是否正确
//如果无参数运行则显示以作程序说明
if ( args.length != 2 )
{
System.err.println("");
System.err.println("Usage: Exercise_4 < input path > < output path > ");
System.err.println("Example: hadoop jar ~/Exercise_4.jar hdfs://localhost:9000/home/james/Exercise_4 hdfs://localhost:9000/home/james/output");
System.err.println("Counter:");
System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");
System.exit(-1);
}
//运行任务
int res = ToolRunner.run(new Configuration(), new Exercise_4(), args);
System.exit(res);
}
}
运行结果截图:package homework;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
public class Exercise_4Test {
MapDriver<LongWritable, Text, Text, Text> mapDriver;
ReduceDriver<Text, Text, Text, Text> reduceDriver;
@Before
public void setUp() {
Exercise_4.Map mapper = new Exercise_4.Map();
Exercise_4.Reduce reducer = new Exercise_4.Reduce();
mapDriver = MapDriver.newMapDriver(mapper);;
reduceDriver = ReduceDriver.newReduceDriver(reducer);
}
@Test
public void testMapper() throws IOException{
// Temperature
Text value1 = new Text(" 7369 SMITH CLERK 7902 17-12-80 800 20");
//Text value2 = new Text(" 10 ACCOUNTING NEW YORK");
mapDriver.withInput(new LongWritable(), value1);
mapDriver.withOutput(new Text("20"), new Text("1800"));
mapDriver.runTest();
}
// @Test
public void testReducer() throws IOException {
List<Text> values = new ArrayList<Text>();
values.add(new Text("2ACCOUNTING"));
values.add(new Text("1800"));
values.add(new Text("11600"));
values.add(new Text("13000"));
reduceDriver.withInput(new Text("20"), values);
reduceDriver.withOutput(new Text("ACCOUNTING"), new Text("5400" + " " + "1800" + " " + "3"));
reduceDriver.runTest();
}
}