MapReduce基础测试(一)
字段解释: 门店名,营业额,开支额,年份
劲松店,600,350,2019年
劲松店,800,250,2020年
王府井店,1900,600,2020年
王府井店,2000,900,2019年
回龙观店,6700,1800,2020年
西单店,3000,1000,2019年
西单店,5000,1000,2020年
,3500,1000,2020年
牡丹园店,3800,1400,2020
牡丹园店,2800,1300,2019年
西直门店,1500,900,2019年
太阳宫店,9000,3600,2019年
三里屯店,,1000,2020年
西直门店,3500,1000,2020年
太阳宫店,6000,4600,2020年
回龙观店,7500,2000,2019年
需求1:去除源文件中字段缺失的数据
需求2:按照不同年份将营业数据拆分到不同的文件中
需求3:对每一年的营业数据按照净盈利排序(营业额-开支额)
需求4:要求最后输出到文件的数据字段之间以‘\t’分割,后边加两个描述字段:净盈利额、盈利或者亏损标记
如:
王府井店 1900 600 2020年 1300 盈利
劲松店 800 950 2020年 -150 亏损
准备
创建test26.txt文件,存入数据
需求1实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* 字段解释: 门店名,营业额,开支额,年份
* 需求1:去除源文件中字段缺失的数据
*
*/
public class Test01 extends Configured implements Tool {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int status = ToolRunner.run(conf, new Test01(), args);
System.exit(status);
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "test26");
job.setJarByClass(Test01.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\input\\test26.txt"));
job.setMapperClass(MapWordCount.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
// job.setPartitionerClass(HashPartitioner.class);
// job.setSortComparatorClass(null);
// job.setGroupingComparatorClass(null);
// job.setCombinerClass(null);
// job.setReducerClass(ReduceWordCount.class);
// job.setOutputKeyClass(Text.class);
// job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path path = new Path("C:\\Users\\User\\Desktop\\outTest26");
FileSystem fs = FileSystem.get(this.getConf());
if (fs.exists(path)) {
fs.delete(path, true);
}
TextOutputFormat.setOutputPath(job, path);
//job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : -1;
}
public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
Text outputKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String first = value.toString().split(",")[0];
String second= value.toString().split(",")[1];
String three=value.toString().split(",")[2];
String year=value.toString().split(",")[3];
if (first.length()!=0&&second.length()!=0&&three.length()!=0&&year.length()!=0){
outputKey.set(first+" "+second+" "+three+" "+year);
context.write(outputKey, NullWritable.get());
}
}
}
// public static class ReduceWordCount extends Reducer<Text, IntWritable, Text, IntWritable> {
// Text outputKey = new Text();
// IntWritable outputValue = new IntWritable();
// @Override
// protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// int sum=0;
// for (IntWritable value : values) {
// sum+=value.get();
// }
// outputKey.set(key);
// outputValue.set(sum);
// context.write(outputKey,outputValue);
// }
// }
}
需求2实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* 字段解释: 门店名,营业额,开支额,年份
*
* 需求2:按照不同年份将营业数据拆分到不同的文件中
*/
public class Test02 extends Configured implements Tool {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int status = ToolRunner.run(conf, new Test02(), args);
System.exit(status);
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "test26");
job.setJarByClass(Test02.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));
job.setMapperClass(MapWordCount.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setPartitionerClass(MRPartition.class);
// job.setSortComparatorClass(null);
// job.setGroupingComparatorClass(null);
// job.setCombinerClass(null);
job.setReducerClass(ReduceWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path path = new Path("C:\\Users\\User\\Desktop\\outTest2602");
FileSystem fs = FileSystem.get(this.getConf());
if (fs.exists(path)) {
fs.delete(path, true);
}
TextOutputFormat.setOutputPath(job, path);
job.setNumReduceTasks(2);
return job.waitForCompletion(true) ? 0 : -1;
}
public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value, NullWritable.get());
}
}
public static class MRPartition extends Partitioner<Text,NullWritable>{
@Override
public int getPartition(Text k2, NullWritable v2, int i) {
String area = k2.toString().split("\\s+")[3];
if ("2019年".equals(area)){
return 0;
}else {
return 1;
}
}
}
public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
}
需求3实现代码:
1.创建JavaBean
分析:增加一个字段 为净利润
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Comparator;
public class Bean implements WritableComparable<Bean> {
private String first;
private String second;
private String three;
private String years;
private String money;
public Bean() {
}
public String getFirst() {
return first;
}
public void setFirst(String first) {
this.first = first;
}
public String getSecond() {
return second;
}
public void setSecond(String second) {
this.second = second;
}
public String getThree() {
return three;
}
public void setThree(String three) {
this.three = three;
}
public String getYears() {
return years;
}
public void setYears(String years) {
this.years = years;
}
public String getMoney() {
return money;
}
public void setMoney(String money) {
this.money = money;
}
@Override
public String toString() {
return first + "\t" + second +"\t"+ three +"\t"+ years+"\t"+money ;
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(first);
dataOutput.writeUTF(second);
dataOutput.writeUTF(three);
dataOutput.writeUTF(years);
dataOutput.writeUTF(money);
}
public void setAll(String first,String second,String three,String years,String money){
this.setFirst(first);
this.setSecond(second);
this.setThree(three);
this.setYears(years);
this.setMoney(money);
}
public void readFields(DataInput dataInput) throws IOException {
this.first = dataInput.readUTF();
this.second = dataInput.readUTF();
this.three = dataInput.readUTF();
this.years = dataInput.readUTF();
this.money=dataInput.readUTF();
}
@Override
public int compareTo(Bean o) {
int comp = this.first.compareTo(o.first);
if (comp == 0) {
return -Integer.valueOf(this.money).compareTo(Integer.valueOf(o.money));
}
return comp;
}
}
2.主代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* 字段解释: 门店名,营业额,开支额,年份
*
*需求3:对每一年的营业数据按照净盈利排序(营业额-开支额)
*/
public class Test03 extends Configured implements Tool {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int status = ToolRunner.run(conf, new Test03(), args);
System.exit(status);
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "test26");
job.setJarByClass(Test03.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));
job.setMapperClass(MapWordCount.class);
job.setMapOutputKeyClass(Bean.class);
job.setMapOutputValueClass(NullWritable.class);
// job.setPartitionerClass(HashPartitioner.class);
// job.setSortComparatorClass(null);
// job.setGroupingComparatorClass(null);
// job.setCombinerClass(null);
job.setReducerClass(ReduceWordCount.class);
job.setOutputKeyClass(Bean.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path path = new Path("C:\\Users\\User\\Desktop\\outTest2603");
FileSystem fs = FileSystem.get(this.getConf());
if (fs.exists(path)) {
fs.delete(path, true);
}
TextOutputFormat.setOutputPath(job, path);
//job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : -1;
}
public static class MapWordCount extends Mapper<LongWritable, Text, Bean, NullWritable> {
Bean bean=new Bean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String first= value.toString().split("\\s+")[0];
String second= value.toString().split("\\s+")[1];
String three= value.toString().split("\\s+")[2];
String year= value.toString().split("\\s+")[3];
int money=Integer.parseInt(second)-Integer.parseInt(three);
String moneyStr=money+"";
bean.setAll(first,second,three,year,moneyStr);
context.write(bean,NullWritable.get());
}
}
public static class ReduceWordCount extends Reducer<Bean, NullWritable, Bean, NullWritable> {
@Override
protected void reduce(Bean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
}
需求4实现代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
* 字段解释: 门店名,营业额,开支额,年份
*需求4:要求最后输出到文件的数据字段之间以‘\t’分割,后边加两个描述字段:净盈利额、盈利或者亏损标记
*如:
*王府井店 1900 600 2020年 1300 盈利
* 劲松店 800 950 2020年 -150 亏损
*
*/
public class Test04 extends Configured implements Tool {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int status = ToolRunner.run(conf, new Test04(), args);
System.exit(status);
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "test26");
job.setJarByClass(Test04.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path("C:\\Users\\User\\Desktop\\outTest26"));
job.setMapperClass(MapWordCount.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
// job.setPartitionerClass(HashPartitioner.class);
// job.setSortComparatorClass(null);
// job.setGroupingComparatorClass(null);
// job.setCombinerClass(null);
job.setReducerClass(ReduceWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path path = new Path("C:\\Users\\User\\Desktop\\outTest2604");
FileSystem fs = FileSystem.get(this.getConf());
if (fs.exists(path)) {
fs.delete(path, true);
}
TextOutputFormat.setOutputPath(job, path);
//job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : -1;
}
public static class MapWordCount extends Mapper<LongWritable, Text, Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
public static class ReduceWordCount extends Reducer<Text, NullWritable, Text, NullWritable> {
Text outputKey = new Text();
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
String second=key.toString().split("\\s+")[1];
String three=key.toString().split("\\s+")[2];
int money= Integer.parseInt(second)-Integer.parseInt(three);
String YN=null;
if (money<0){
YN="亏损";
}else {
YN="盈利";
}
outputKey.set(key+"\t"+money+"\t"+YN);
context.write(outputKey,NullWritable.get());
}
}
}