请使用MapReduce统计 维修换件清单1.csv中的每个设备,维修的换件数量和维修费用 ,并输出格式 为 设备名称,换件数量,维修费用。
一、思路:由于这个题目中要统计换件数量和维修费用两个字段,而mapreduce一次只能处理一个字段,因此我们需要封装javabean类重新将两个字段封装在一个类中。然后就是我们的常规操作了。
二:
数据集:
链接:https://pan.baidu.com/s/1gCJiGXX3HyRV3c-HSavA1w
提取码:gv3i
三:上代码
package ls;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Bisai2 {
public static class Mymaper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, JavaBean>{
protected void map(LongWritable key, Text value,
org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, JavaBean>.Context context)
throws IOException, InterruptedException {
String s[] = value.toString().split(",");
String a = s[1];
long shulaing=Long.parseLong(s[3]);
double feiyong =Double.parseDouble(s[4]);
context.write(new Text(a),new JavaBean(shulaing,feiyong));
}
}
public static class MyReduce extends org.apache.hadoop.mapreduce.Reducer<Text, JavaBean, Text, JavaBean>{
protected void reduce(Text k2, Iterable<JavaBean> v2s,
org.apache.hadoop.mapreduce.Reducer<Text, JavaBean, Text, JavaBean>.Context context)
throws IOException, InterruptedException {
long shulaings = 0L;
double feiyongs = 0;
for (JavaBean d:v2s) {
shulaings+=d.getShulaing();
feiyongs+=d.getFeiyong();
}
context.write(k2, new JavaBean(shulaings,feiyongs));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//设置参数
Configuration conf =new Configuration();
//创建任务
Job job =Job.getInstance(conf,Bisai2.class.getSimpleName() );
//指定jar文件
job.setJarByClass(Bisai2.class);
//指定输入路径
FileInputFormat.addInputPath(job,new Path(args[0]));
//指定map类以及key和value的输出类型
job.setMapperClass(Mymaper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(JavaBean.class);
//指定reduce类以及key和value的输出类型
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(JavaBean.class);
//输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
package ls;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class JavaBean implements Writable {
long shulaing;
double feiyong;
public long getShulaing() {
return shulaing;
}
public void setShulaing(long shulaing) {
this.shulaing = shulaing;
}
public double getFeiyong() {
return feiyong;
}
public void setFeiyong(double feiyong) {
this.feiyong = feiyong;
}
public JavaBean() {
super();
}
public JavaBean(long shulaing,double feiyong) {
this.shulaing = shulaing;
this.feiyong=feiyong;
}
public void write(DataOutput out) throws IOException {
out.writeLong(this.shulaing);//writeUTF 是对字符串
out.writeDouble(this.feiyong);
}
public void readFields(DataInput in) throws IOException {
this.shulaing=in.readLong();
this.feiyong=in.readDouble();
}
public String toString() {
return " "+ this.shulaing + "\t" + this.feiyong +"\t" ;
}
}
四:运行查看结果
五:注意我这个写了javabean封装那怎么打包运行呢?
我们需要对ls这个包打包、而不仅仅是bisai2.打完包后我们运行阶段复制的主类名只要是主方法的那个类的主类名。因此是复制Bisai2.java的主类名。