【无标题】

schnappi_0227

已于 2022-04-17 16:49:47 修改

阅读量1.5k

点赞数

文章标签：算法

于 2022-04-14 16:50:08 首次发布

本文链接：https://blog.csdn.net/qq_43544682/article/details/124175984

版权

1.test0

从eclipse复制过来的wordcount，已经成功运行

/*从eclipse复制过来的wordcount，已经成功运行*/
package org.apache.hadoop.examples;
 
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class WordCount {
    public WordCount() {
    }
 
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(); //获取环境变量
        String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
        //String[] otherArgs=new String[]{"input","output"}; 
        if(otherArgs.length < 2) {
            System.err.println("Usage: wordcount <in> [<in>...] <out>");
            System.exit(2);
        }
 
        Job job = Job.getInstance(conf, "word count");//创建一个新的任务
        job.setJarByClass(WordCount.class);      //设置主要工作类
        job.setMapperClass(WordCount.TokenizerMapper.class);//设置Mapper类
        job.setCombinerClass(WordCount.IntSumReducer.class);
        job.setReducerClass(WordCount.IntSumReducer.class);//设置Reduce类
        job.setOutputKeyClass(Text.class);       //设置输出key格式
        job.setOutputValueClass(IntWritable.class);//设置输出value格式
 
        for(int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));//添加输入路径，当input里有多个文件
        }
 
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));//添加输出路径
        System.exit(job.waitForCompletion(true)?0:1);//运行任务
    }
    
    //静态内部类
    public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();
 
        public IntSumReducer() {
        }
 
        //Reduce方法定义
        public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum = 0;//设置辅助求和值
 
            IntWritable val;
            for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) {
                val = (IntWritable)i$.next();
            }
 
            this.result.set(sum);
            context.write(key, this.result);//重新将值写入
        }
    }
    
    //静态内部类
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
        private static final IntWritable one = new IntWritable(1);
        private Text word = new Text();
 
        public TokenizerMapper() {
        }
        //Map方法定义
        public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());//分割字符串
 
            while(itr.hasMoreTokens()) {//获取内容
                this.word.set(itr.nextToken());
                context.write(this.word, one);//写入上下文
            }
        }
    }
}

2.test1

计数2


package org.apache.hadoop.examples;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/*public class test1 {
ToolRunnerTest
}*/

public class test1 extends Configured implements Tool {// 继承类
	@Override
	public int run(String[] args) throws Exception {// 驱动方法
		Configuration conf = getConf();// 获取configuration 实例
		Job job = new Job(conf);// 创建任务实例
		job.setJarByClass(getClass());// 创建工作类
		FileSystem fs = FileSystem.get(conf);// 获取环境变量
		fs.delete(new Path("output"), true);// 删除已存在的目录
		FileInputFormat.addInputPath(job, new Path("input/test"));// 建立输入文件路径
		FileOutputFormat.setOutputPath(job, new Path("output"));// 建立输出文件路径
		job.setMapperClass(TxtCounter.TxtMapper.class); // Mapper工作类
		job.setReducerClass(TxtCounter.TxtReducer.class);// Reducer类
		job.setOutputKeyClass(Text.class);// 设置输出中键的类型
		job.setOutputValueClass(IntWritable.class);// 设置输出中值的类型
		job.waitForCompletion(true);// 开始程序运行
		return 0;
	}

	public static void print(Tool tool) throws Exception {// 读取输出结果的方法
		FileSystem fs = FileSystem.get(tool.getConf());// 获取对应的文件系统
		Path path = new Path("output");// 创建读取文件路径
		FSDataInputStream fsin = fs.open(path);// 打开文件
		int length = 0;// 设置辅助变量
		byte[] buff = new byte[128];// 设置辅助变量
		while ((length = fsin.read(buff, 0, 128)) != -1) {// 开始读取文件
			System.out.println(new String(buff, 0, length));// 输出读取的内容
		}
	}

	public static void main(String[] args) throws Exception {// 主方法
		Tool tool = new test1();// 创建Tool接口的实现类
		ToolRunner.run(tool, args);// 运行驱动
		print(tool);// 打印结果
	}
}

class TxtCounter {// 计数类
	static class TxtMapper extends Mapper<Object, Text, Text, IntWritable> {//Mapper类

		//protected void Map(LongWritable key, Text value, Context context)
		protected void Map(Object key, Text value, Context context)
				throws java.io.IOException, InterruptedException {// Map实现方法
			String[] strs = value.toString().split(" ");// 对内容获取
			for (String str : strs) {// 获取内容
				context.write(new Text(str), new IntWritable(1));// 将键值对写入上下文
			}
		};
	}

	static class TxtReducer extends Reducer<Text, IntWritable, Text, IntWritable> {//
		protected void Reduce(Text key, Iterable<IntWritable> values, Context context)
				throws java.io.IOException, InterruptedException {// Reduce实现方法
			int sum = 0;// 辅助类型
			Iterator<IntWritable> it = values.iterator();// 遍历数据集
			while (it.hasNext()) {// 进行运算
				IntWritable value = it.next();// 获取元素值
				sum += value.get();// 进行求和
			}
			context.write(key, new IntWritable(sum));// 结果写入上下文
		};
	}
}

3.test2

/*
 * 10.5.2自定义的ScoreWritable
对学生成绩进行分组，第一步就是实现自定义的 Writable类。
这里的学生成绩包含两个部分，分别是Text类型的学科名与Intwritable类型的成绩数。
因此，在构建自定义的Writable类型时可以自定义出具有以上两种类型的ScoreWritable。
其代码如下所示:

 * */
class Scorewritable implements writableComparable<Scorewritable> {
	Text first;// Text类型变量
	Intwritable second;// Intwritable类型变量

	public void set(Text first, Intwritable second) {// 相应的设置方法
		this.first = first;// 设置第一个Text值
		this.second = second;// 设置第二个Intwritable值
	}

	public Text getFirst() {// 返回第一个值
		return first;// 返回值
	}

	public Intwritable getSecond() {// 返回第二个值
		return second;// 返回值
	}

	@override

	public void readFields(DataInput in) throws IOException { // 数据读取方法
		first = new Text(in.readUTF());// 读取第一个值
		second = new Intwritable(in.readInt());// 读取第二个值
	}

	public void write(Dataoutput out) throws IOException { // 数据写方法
		out.writeUTF(first.toString());// 写出第一个数据
		out.writeInt(second.get());// 写出第二个数据
	}

	@override

	public boolean equals(object obj) {// 相应的equals方法
		scorewritable temp = (scorewritable) obj;// 强制类型转换
		return first.equals(temp.first) && second.equals(temp.second);// 返回比较值
	}

	@override

	public int hashcode() {// 相应的Hashcode方法
		return first.hashCode() * 163 + second.hashcode();// 获得hash值
	}

	@override
	public int compareTo(Scorewritable o) {
		if (this.first != o.getFirst()) {
			// 对第一个值进行判断
			return this.first.tostring().compareTo(o.first.tostring());
			// 返回第一个值比较结果
		} else if (this.second != o.getSecond()) {
			// 对第二个值进行判断
			return this.second.get() - o.getSecond().get();
			// 返回第二个值比较结果
		} else
			return 0;
	}

	@override

	public string tostring() {// tostring方法
		return first.toString() + " :" + second.get();// 返回值
	}
}

4.test3

/*程序10-8
 * 使用姓名分组对数据进行处理的程序如程序10-8所示。
 * */

package org.apache.hadoop.examples;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*
public class StudentScore {

}
*/
/*程序10-8
 * 使用姓名分组对数据进行处理的程序如程序10-8所示。
 * */

public class StudentScore extends Configured implements Tool {
	public static void main(String[] args) throws Exception {// 运行方法
		ToolRunner.run(new StudentScore(), args);// 运行MapReduce类
	}

	@Override
	public int run(String[] args) throws Exception {// 运行方法
		Configuration conf = getConf();// 获取环境
		Job job = new Job(conf);// 建立任务
		job.setJarByClass(getClass());// 输入任务

		FileSystem fs = FileSystem.get(conf);// 获取文件系统
		fs.delete(new Path("out"), true);// 删除存在的文件

		FileInputFormat.addInputPath(job, new Path("student.txt"));// 获取输入路径
		FileOutputFormat.setOutputPath(job, new Path("out"));// 建立输出路径

		job.setOutputKeyClass(Text.class);// 设置输出数据类型
		job.setOutputValueClass(ScoreWritable.class);// 设置输出数据类型
		job.setMapperClass(StudentMap.class);// 设置Map类
		job.setNumReduceTasks(3);// 设置分片处理数
		job.setPartitionerClass(StudentPartitioner.class);// 设置分片类型
		job.setReducerClass(StudentReduce.class);// 设置Reduce类型
		job.waitForCompletion(true);// 运行程序
		return 0;
	}
}

class ScoreWritable implements WritableComparable<ScoreWritable> {

	Text first;// 一个Text类型变量
	IntWritable second;// 一个Intwritable类型变量

	public void set(Text first, IntWritable second) {// 相应的设置方法
		this.first = first;// 设置Text值
		this.second = second;// 设置Intwritable值
	}

	public Text getFirst() {// 返回第一个值
		return first;// 返回值
	}

	public IntWritable getSecond() {// 返回第二个值
		return second;// 返回值
	}

	@Override
	public void readFields(DataInput in) throws IOException { // 数据读取方法
		first = new Text(in.readUTF());// 读取第一个值
		second = new IntWritable(in.readInt());// 读取第二个值
	}

	public void write(DataOutput out) throws IOException {// 数据写方法
		out.writeUTF(first.toString());// 写出第一个数据
		out.writeInt(second.get());// 写出第二个数据
	}

	@Override
	public boolean equals(Object obj) {// 相应的equals方法
		ScoreWritable temp = (ScoreWritable) obj;// 强制类型转换
		return first.equals(temp.first) && second.equals(temp.second);// 返回比较值
	}

	@Override
	public int hashCode() {// 相应的Hashcode方法
		return first.hashCode() * 163 + second.hashCode();// 获得hash值
	}

	@Override

	public int compareTo(ScoreWritable o) {
		if (this.first != o.getFirst()) {// 对第一个值进行判断
			return this.first.toString().compareTo(o.first.toString());// 返回第一个值比较结果
		} else if (this.second != o.getSecond()) {// 对第二个值进行判断
			return this.second.get() - o.getSecond().get();// 返回第二个值比较结果
		} else
			return 0;
	}

	@Override
	public String toString() {
		return first.toString() + ":" + second.get();
	}
}

class StudentPartitioner extends Partitioner<Text, ScoreWritable> {
	@Override
	public int getPartition(Text key, ScoreWritable value, int numPartitions) {
		if (key.toString().equals("lucy"))
			return 1;
		else if (key.toString().equals("snow"))
			return 2;
		else
			return 0;
	}
}

class StudentMap extends Mapper<LongWritable, Text, Text, ScoreWritable> {// 自定义Map类
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// 自定义的Map方法
		String[] strs = value.toString().split(" ");// 获得输入数据
		Text keyy = new Text(strs[0]);// 获取姓名作为key
		ScoreWritable valuee = new ScoreWritable();// 设定value类型
		valuee.set(new Text(strs[1]), new IntWritable(Integer.parseInt(strs[2])));// 注入自定义的Scorewritable值
		context.write(keyy, valuee);// 写入上下文
	};
}

class StudentReduce extends Reducer<Text, ScoreWritable, Text, ScoreWritable> {
	protected void reduce(Text key, Iterable<ScoreWritable> values, Context context)
			throws IOException, InterruptedException {// 自定义的Reduce方法
		for (ScoreWritable value : values) {// 迭代获取值
			context.write(key, value);// 写入结果上下文
		}
	};
}

schnappi_0227

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【无标题】

1.test0从eclipse复制过来的wordcount，已经成功运行/*从eclipse复制过来的wordcount，已经成功运行*/package org.apache.hadoop.examples; import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.a
复制链接

扫一扫