1.test0
从eclipse复制过来的wordcount,已经成功运行
/*从eclipse复制过来的wordcount,已经成功运行*/
package org.apache.hadoop.examples;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public WordCount() {
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration(); //获取环境变量
String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
//String[] otherArgs=new String[]{"input","output"};
if(otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");//创建一个新的任务
job.setJarByClass(WordCount.class); //设置主要工作类
job.setMapperClass(WordCount.TokenizerMapper.class);//设置Mapper类
job.setCombinerClass(WordCount.IntSumReducer.class);
job.setReducerClass(WordCount.IntSumReducer.class);//设置Reduce类
job.setOutputKeyClass(Text.class); //设置输出key格式
job.setOutputValueClass(IntWritable.class);//设置输出value格式
for(int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));//添加输入路径,当input里有多个文件
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));//添加输出路径
System.exit(job.waitForCompletion(true)?0:1);//运行任务
}
//静态内部类
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public IntSumReducer() {
}
//Reduce方法定义
public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int sum = 0;//设置辅助求和值
IntWritable val;
for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) {
val = (IntWritable)i$.next();
}
this.result.set(sum);
context.write(key, this.result);//重新将值写入
}
}
//静态内部类
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
private static final IntWritable one = new IntWritable(1);
private Text word = new Text();
public TokenizerMapper() {
}
//Map方法定义
public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());//分割字符串
while(itr.hasMoreTokens()) {//获取内容
this.word.set(itr.nextToken());
context.write(this.word, one);//写入上下文
}
}
}
}
2.test1
计数2
package org.apache.hadoop.examples;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*public class test1 {
ToolRunnerTest
}*/
public class test1 extends Configured implements Tool {// 继承类
@Override
public int run(String[] args) throws Exception {// 驱动方法
Configuration conf = getConf();// 获取configuration 实例
Job job = new Job(conf);// 创建任务实例
job.setJarByClass(getClass());// 创建工作类
FileSystem fs = FileSystem.get(conf);// 获取环境变量
fs.delete(new Path("output"), true);// 删除已存在的目录
FileInputFormat.addInputPath(job, new Path("input/test"));// 建立输入文件路径
FileOutputFormat.setOutputPath(job, new Path("output"));// 建立输出文件路径
job.setMapperClass(TxtCounter.TxtMapper.class); // Mapper工作类
job.setReducerClass(TxtCounter.TxtReducer.class);// Reducer类
job.setOutputKeyClass(Text.class);// 设置输出中键的类型
job.setOutputValueClass(IntWritable.class);// 设置输出中值的类型
job.waitForCompletion(true);// 开始程序运行
return 0;
}
public static void print(Tool tool) throws Exception {// 读取输出结果的方法
FileSystem fs = FileSystem.get(tool.getConf());// 获取对应的文件系统
Path path = new Path("output");// 创建读取文件路径
FSDataInputStream fsin = fs.open(path);// 打开文件
int length = 0;// 设置辅助变量
byte[] buff = new byte[128];// 设置辅助变量
while ((length = fsin.read(buff, 0, 128)) != -1) {// 开始读取文件
System.out.println(new String(buff, 0, length));// 输出读取的内容
}
}
public static void main(String[] args) throws Exception {// 主方法
Tool tool = new test1();// 创建Tool接口的实现类
ToolRunner.run(tool, args);// 运行驱动
print(tool);// 打印结果
}
}
class TxtCounter {// 计数类
static class TxtMapper extends Mapper<Object, Text, Text, IntWritable> {//Mapper类
//protected void Map(LongWritable key, Text value, Context context)
protected void Map(Object key, Text value, Context context)
throws java.io.IOException, InterruptedException {// Map实现方法
String[] strs = value.toString().split(" ");// 对内容获取
for (String str : strs) {// 获取内容
context.write(new Text(str), new IntWritable(1));// 将键值对写入上下文
}
};
}
static class TxtReducer extends Reducer<Text, IntWritable, Text, IntWritable> {//
protected void Reduce(Text key, Iterable<IntWritable> values, Context context)
throws java.io.IOException, InterruptedException {// Reduce实现方法
int sum = 0;// 辅助类型
Iterator<IntWritable> it = values.iterator();// 遍历数据集
while (it.hasNext()) {// 进行运算
IntWritable value = it.next();// 获取元素值
sum += value.get();// 进行求和
}
context.write(key, new IntWritable(sum));// 结果写入上下文
};
}
}
3.test2
/*
* 10.5.2自定义的ScoreWritable
对学生成绩进行分组,第一步就是实现自定义的 Writable类。
这里的学生成绩包含两个部分,分别是Text类型的学科名与Intwritable类型的成绩数。
因此,在构建自定义的Writable类型时可以自定义出具有以上两种类型的ScoreWritable。
其代码如下所示:
* */
class Scorewritable implements writableComparable<Scorewritable> {
Text first;// Text类型变量
Intwritable second;// Intwritable类型变量
public void set(Text first, Intwritable second) {// 相应的设置方法
this.first = first;// 设置第一个Text值
this.second = second;// 设置第二个Intwritable值
}
public Text getFirst() {// 返回第一个值
return first;// 返回值
}
public Intwritable getSecond() {// 返回第二个值
return second;// 返回值
}
@override
public void readFields(DataInput in) throws IOException { // 数据读取方法
first = new Text(in.readUTF());// 读取第一个值
second = new Intwritable(in.readInt());// 读取第二个值
}
public void write(Dataoutput out) throws IOException { // 数据写方法
out.writeUTF(first.toString());// 写出第一个数据
out.writeInt(second.get());// 写出第二个数据
}
@override
public boolean equals(object obj) {// 相应的equals方法
scorewritable temp = (scorewritable) obj;// 强制类型转换
return first.equals(temp.first) && second.equals(temp.second);// 返回比较值
}
@override
public int hashcode() {// 相应的Hashcode方法
return first.hashCode() * 163 + second.hashcode();// 获得hash值
}
@override
public int compareTo(Scorewritable o) {
if (this.first != o.getFirst()) {
// 对第一个值进行判断
return this.first.tostring().compareTo(o.first.tostring());
// 返回第一个值比较结果
} else if (this.second != o.getSecond()) {
// 对第二个值进行判断
return this.second.get() - o.getSecond().get();
// 返回第二个值比较结果
} else
return 0;
}
@override
public string tostring() {// tostring方法
return first.toString() + " :" + second.get();// 返回值
}
}
4.test3
/*程序10-8
* 使用姓名分组对数据进行处理的程序如程序10-8所示。
* */
package org.apache.hadoop.examples;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*
public class StudentScore {
}
*/
/*程序10-8
* 使用姓名分组对数据进行处理的程序如程序10-8所示。
* */
public class StudentScore extends Configured implements Tool {
public static void main(String[] args) throws Exception {// 运行方法
ToolRunner.run(new StudentScore(), args);// 运行MapReduce类
}
@Override
public int run(String[] args) throws Exception {// 运行方法
Configuration conf = getConf();// 获取环境
Job job = new Job(conf);// 建立任务
job.setJarByClass(getClass());// 输入任务
FileSystem fs = FileSystem.get(conf);// 获取文件系统
fs.delete(new Path("out"), true);// 删除存在的文件
FileInputFormat.addInputPath(job, new Path("student.txt"));// 获取输入路径
FileOutputFormat.setOutputPath(job, new Path("out"));// 建立输出路径
job.setOutputKeyClass(Text.class);// 设置输出数据类型
job.setOutputValueClass(ScoreWritable.class);// 设置输出数据类型
job.setMapperClass(StudentMap.class);// 设置Map类
job.setNumReduceTasks(3);// 设置分片处理数
job.setPartitionerClass(StudentPartitioner.class);// 设置分片类型
job.setReducerClass(StudentReduce.class);// 设置Reduce类型
job.waitForCompletion(true);// 运行程序
return 0;
}
}
class ScoreWritable implements WritableComparable<ScoreWritable> {
Text first;// 一个Text类型变量
IntWritable second;// 一个Intwritable类型变量
public void set(Text first, IntWritable second) {// 相应的设置方法
this.first = first;// 设置Text值
this.second = second;// 设置Intwritable值
}
public Text getFirst() {// 返回第一个值
return first;// 返回值
}
public IntWritable getSecond() {// 返回第二个值
return second;// 返回值
}
@Override
public void readFields(DataInput in) throws IOException { // 数据读取方法
first = new Text(in.readUTF());// 读取第一个值
second = new IntWritable(in.readInt());// 读取第二个值
}
public void write(DataOutput out) throws IOException {// 数据写方法
out.writeUTF(first.toString());// 写出第一个数据
out.writeInt(second.get());// 写出第二个数据
}
@Override
public boolean equals(Object obj) {// 相应的equals方法
ScoreWritable temp = (ScoreWritable) obj;// 强制类型转换
return first.equals(temp.first) && second.equals(temp.second);// 返回比较值
}
@Override
public int hashCode() {// 相应的Hashcode方法
return first.hashCode() * 163 + second.hashCode();// 获得hash值
}
@Override
public int compareTo(ScoreWritable o) {
if (this.first != o.getFirst()) {// 对第一个值进行判断
return this.first.toString().compareTo(o.first.toString());// 返回第一个值比较结果
} else if (this.second != o.getSecond()) {// 对第二个值进行判断
return this.second.get() - o.getSecond().get();// 返回第二个值比较结果
} else
return 0;
}
@Override
public String toString() {
return first.toString() + ":" + second.get();
}
}
class StudentPartitioner extends Partitioner<Text, ScoreWritable> {
@Override
public int getPartition(Text key, ScoreWritable value, int numPartitions) {
if (key.toString().equals("lucy"))
return 1;
else if (key.toString().equals("snow"))
return 2;
else
return 0;
}
}
class StudentMap extends Mapper<LongWritable, Text, Text, ScoreWritable> {// 自定义Map类
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// 自定义的Map方法
String[] strs = value.toString().split(" ");// 获得输入数据
Text keyy = new Text(strs[0]);// 获取姓名作为key
ScoreWritable valuee = new ScoreWritable();// 设定value类型
valuee.set(new Text(strs[1]), new IntWritable(Integer.parseInt(strs[2])));// 注入自定义的Scorewritable值
context.write(keyy, valuee);// 写入上下文
};
}
class StudentReduce extends Reducer<Text, ScoreWritable, Text, ScoreWritable> {
protected void reduce(Text key, Iterable<ScoreWritable> values, Context context)
throws IOException, InterruptedException {// 自定义的Reduce方法
for (ScoreWritable value : values) {// 迭代获取值
context.write(key, value);// 写入结果上下文
}
};
}