目录
待处理数据内容:
名字 | 语文 | 数学 | 英语 |
lh | 92 | 68 | 70 |
zyt | 94 | 88 | 75 |
ls | 96 | 78 | 78 |
hgw | 90 | 70 | 56 |
yxx | 80 | 88 | 73 |
hz | 90 | 98 | 70 |
xyd | 60 | 88 | 73 |
hj | 90 | 58 | 70 |
cs | 50 | 58 | 11 |
算每个人的平均成绩
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AvgDemo {
//自定义myMapper
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
//只在map方法运行之前执行一次。(仅执行一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String [] scores = line.split("\t");
String name = scores[0];
String chinese = scores[1];
String math = scores[2];
String english = scores[3];
double avg = (Integer.parseInt(chinese) + Integer.parseInt(math) +
Integer.parseInt(english)) / ((scores.length-1)*1.0);
k.set(name);
v.set(avg+"");
context.write(k,v);
}
//map方法运行完后执行一次(仅执行一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
/*
//自定义myReducer
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
//在reduce方法执行之前执行一次。(仅一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
@Override
protected void reduce(Text key, Iterable<Text> value,Context context)
throws IOException, InterruptedException {
}
//在reduce方法执行之后执行一次。(仅一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
*/
/**
* job的驱动方法
* @param args
*/
public static void main(String[] args) {
try {
//1、获取Conf
Configuration conf = new Configuration();
//2、创建job
Job job = Job.getInstance(conf, "model01");
//3、设置运行job的class
job.setJarByClass(AvgDemo.class);
//4、设置map相关属性
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//5、设置reduce相关属性
/*job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);*/
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
求每个学科的平均成绩
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
每个学科的平均成绩?
语文 数学 英语
76 89 90
* @author lyd
*
*/
public class AvgDemo02 {
//自定义myMapper
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
//只在map方法运行之前执行一次。(仅执行一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String scores [] = line.split("\t");
String chinese = scores[1];
String math = scores[2];
String english = scores[3];
k.set("_");
v.set(chinese+"_"+math+"_"+english);
context.write(k, v);
}
//map方法运行完后执行一次(仅执行一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
//自定义myReducer
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
//在reduce方法执行之前执行一次。(仅一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
context.write(new Text("语文"+"\t"+"数学"+"\t"+"英语"), new Text(""));
}
@Override
protected void reduce(Text key, Iterable<Text> value,Context context)
throws IOException, InterruptedException {
int counter = 0;
double c = 0;
double m = 0;
double e = 0;
for (Text t : value) {
String scores [] = t.toString().split("_");
c += Double.parseDouble(scores[0]);
m += Double.parseDouble(scores[1]);
e += Double.parseDouble(scores[2]);
counter ++;
}
context.write(new Text(c/counter+"\t"+m/counter+"\t"+e/counter), new Text(""));
}
//在reduce方法执行之后执行一次。(仅一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
/**
* job的驱动方法
* @param args
*/
public static void main(String[] args) {
try {
//1、获取Conf
Configuration conf = new Configuration();
//2、创建job
Job job = Job.getInstance(conf, "model01");
//3、设置运行job的class
job.setJarByClass(AvgDemo02.class);
//4、设置map相关属性
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//5、设置reduce相关属性
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
总平均分每个分数段的人数以及百分比
分数段 | 人数 | 占总数的百分比 |
<60 | 1 | 8% |
60-70 | 2 | 16% |
70-80 | 5 | 33% |
80-90 | 2 | 16% |
90-100 | 3 | 28% |
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
<60 1 8%
60-70 2 %16
70-80 5 33%
80-90 2 16%
90-100 3 28%
* @author lyd
*
*/
public class AvgDemo03 {
//static int counter = 0;
//自定义myMapper
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
//只在map方法运行之前执行一次。(仅执行一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String scores [] = line.split("\t");
String chinese = scores[1];
String math = scores[2];
String english = scores[3];
double avg = (Double.parseDouble(chinese) + Double.parseDouble(math)
+ Double.parseDouble(english))/(scores.length-1);
//判断
if(avg < 60){
k.set("<60");
v.set("1");
} else if(avg >= 60 && avg < 70){
k.set("60-70");
v.set("1");
} else if(avg >= 70 && avg < 80){
k.set("70-80");
v.set("1");
} else if(avg >= 80 && avg < 90){
k.set("80-90");
v.set("1");
} else if(avg >= 90 && avg <= 100){
k.set("90-100");
v.set("1");
}
//context.getConfiguration().setInt("counter", counter);
context.write(k, v);
}
//map方法运行完后执行一次(仅执行一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
//自定义myReducer
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
//在reduce方法执行之前执行一次。(仅一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
context.write(new Text("分数段"), new Text("人数"+"\t"+"百分比"));
}
int totalPerson = 0;
/*int l6 = 0;
int g6l7 = 0;
int g7l8 = 0;
int g8l9 = 0;
int g9l10 = 0;*/
List<String> li = new ArrayList<String>();
@Override
protected void reduce(Text key, Iterable<Text> value,Context context)
throws IOException, InterruptedException {
/**
* <60 list(1,1)
*/
int i = 0;
for (Text t : value) {
if(key.toString().equals("<60")){
//l6 ++;
i ++ ;
} else if (key.toString().equals("60-70")){
//g6l7 ++;
i ++ ;
} else if (key.toString().equals("70-80")){
//g7l8 ++ ;
i ++ ;
} else if (key.toString().equals("80-90")){
//g8l9 ++;
i ++ ;
} else if (key.toString().equals("90-100")){
//g9l10 ++;
i ++ ;
}
totalPerson ++ ;
}
li.add(key.toString()+"_"+i);
//context.getConfiguration().get("counter");
}
//在reduce方法执行之后执行一次。(仅一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
for (String s : li) {
String l [] = s.split("_");
context.write(new Text(l[0]), new Text(l[1]+"\t"+Double.parseDouble(l[1])/totalPerson*100+"%"));
}
}
}
/**
* job的驱动方法
* @param args
*/
public static void main(String[] args) {
try {
//1、获取Conf
Configuration conf = new Configuration();
//2、创建job
Job job = Job.getInstance(conf, "model01");
//3、设置运行job的class
job.setJarByClass(AvgDemo03.class);
//4、设置map相关属性
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//5、设置reduce相关属性
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
将三门课程中任意一门不及格的学生过滤出来
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
将三门课程中任意一门不及格的学生过滤出来?
* @author lyd
*
*/
public class GrepDemo {
//static int counter = 0;
//自定义myMapper
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
//只在map方法运行之前执行一次。(仅执行一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String scores [] = line.split("\t");
String chinese = scores[1];
String math = scores[2];
String english = scores[3];
if(Double.parseDouble(chinese) < 60 || Double.parseDouble(math) < 60 || Double.parseDouble(english) < 60){
context.write(value, new Text(""));
}
}
//map方法运行完后执行一次(仅执行一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
/**
* job的驱动方法
* @param args
*/
public static void main(String[] args) {
try {
//1、获取Conf
Configuration conf = new Configuration();
//2、创建job
Job job = Job.getInstance(conf, "model01");
//3、设置运行job的class
job.setJarByClass(GrepDemo.class);
//4、设置map相关属性
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//5、设置reduce相关属性
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
统计成材率
每一门成绩都大于60分的人数/总人数
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
统计成材率?每一门成绩都大于60分的人数/总人数
成材率 88%
留级率 12%
* @author lyd
*
*/
public class SuccessDemo {
//static int counter = 0;
//自定义myMapper
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
//只在map方法运行之前执行一次。(仅执行一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
}
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String scores [] = line.split("\t");
String chinese = scores[1];
String math = scores[2];
String english = scores[3];
if(Double.parseDouble(chinese) >= 60 && Double.parseDouble(math) >= 60 && Double.parseDouble(english) >= 60){
context.write(new Text("up"), new Text("1"));
} else {
context.write(new Text("down"), new Text("1"));
}
}
//map方法运行完后执行一次(仅执行一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
}
}
//自定义myReducer
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
//在reduce方法执行之前执行一次。(仅一次)
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
context.write(new Text("分数段"), new Text("人数"+"\t"+"百分比"));
}
int totalPerson = 0;
int u = 0;
@Override
protected void reduce(Text key, Iterable<Text> value,Context context)
throws IOException, InterruptedException {
for (Text t : value) {
if(key.toString().equals("up")){
u ++;
}
totalPerson ++;
}
}
//在reduce方法执行之后执行一次。(仅一次)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new Text("成才率"), new Text(u*100.0/totalPerson+"%"));
context.write(new Text("留级率"), new Text((totalPerson-u)*100.0/totalPerson+"%"));
}
}
/**
* job的驱动方法
* @param args
*/
public static void main(String[] args) {
try {
//1、获取Conf
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
//2、创建job
Job job = Job.getInstance(conf, "model01");
//3、设置运行job的class
job.setJarByClass(SuccessDemo.class);
//4、设置map相关属性
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
//5、设置reduce相关属性
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//判断输出目录是否存在,若存在则删除
FileSystem fs = FileSystem.get(conf);
if(fs.exists(new Path(args[1]))){
fs.delete(new Path(args[2]), true);
}
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}