元数据:(商品:价格)
维护 23
降低实际上低价低价 12
合适的话 8328
哈市 83299
留下 10
留下 9
问题描述:第一步筛选出价格小于10000的数据
第二步筛选出价格小于100的
第三步合并相同商品的价格
第四部过滤商品名称的长度大于2的
代码实现(mr的串联)
package chainmapper;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* mr串联测试
* @author 韩利鹏
*
*/
public class ChainMapperReduer {
/**
* map1 完成money>10000的过滤
* @author 韩利鹏
*
*/
public static class CMRMap1 extends Mapper<LongWritable, Text, Text, Text>{
private Text k = new Text();
private Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] strs = value.toString().split(" ");
int money = Integer.parseInt(strs[1]);
if(money>10000){
return;
}
k.set(strs[0]);
v.set(strs[1]);
context.write(k, v);
}
}
/**
* map2 money>100的过滤
* @author 韩利鹏
*
*/
public static class CMRMap2 extends Mapper<Text, Text, Text, Text>{
@Override
protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
int money = Integer.parseInt(value.toString());
if(money>100){
return;
}
context.write(key, value);
}
}
/**
* reduce的数据合并
* @author 韩利鹏
*
*/
public static class CMRReduce extends Reducer<Text, Text, Text, Text>{
private Text v = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
int sum = 0;
for(Text text:values){
sum+=Integer.parseInt(text.toString());
}
v.set(sum+"");
context.write(key, v);
}
}
/**
* map3过滤字长
* @author 韩利鹏
*
*/
public static class CMRMap3 extends Mapper<Text, Text, Text, Text>{
@Override
protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
if(key.toString().trim().length()>2){
return;
}
context.write(key, value);
}
}
/**
* main函数启动job
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration config=new Configuration();
config.set("fs.defaultFS", "hdfs://192.168.10.97:9000/");
Job job=Job.getInstance(config);
//设置主类
job.setJarByClass(ChainMapperReduer.class);
Configuration map1=new Configuration(false);
ChainMapper.addMapper(job, CMRMap1.class, LongWritable.class, Text.class, Text.class, Text.class, map1);
Configuration map2=new Configuration(false);
ChainMapper.addMapper(job, CMRMap2.class, Text.class, Text.class, Text.class, Text.class, map2);
Configuration reduce1=new Configuration(false);
ChainReducer.setReducer(job, CMRReduce.class, Text.class, Text.class, Text.class, Text.class, reduce1);
Configuration map3=new Configuration(true);
ChainMapper.addMapper(job, CMRMap3.class, Text.class, Text.class, Text.class, Text.class, map3);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("/han/file/ChainMapperReduce.txt"));
FileOutputFormat.setOutputPath(job, new Path("/han/outfile/chainmapreduce"));
System.exit(job.waitForCompletion(true)?0:1);
/*//设置map的类
job.setMapperClass(OutMapper.class);
//设置reduce的主类
job.setReducerClass(OutReducer.class);
//设置map的输出的key 和 value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置reduce的key 和 value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置输入和输出路径
FileInputFormat.addInputPath(job, new Path("/file/format"));
FileOutputFormat.setOutputPath(job, new Path("/han/outputformat"));
MultipleOutputs.addNamedOutput(job, "mulit",org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class , Text.class, Text.class);
//提交job作业
System.exit(job.waitForCompletion(true)?0:1);*/
}
}