mapjoin操作
思路:商品加载到内存中 然后数据在map数据段输出前,进行替换
public class Cachemapper extends Mapper<LongWritable, Text, Text, NullWritable>{
Hashmap<String,String> pdmap = new Hashmap<>();
//1、商品表加载到内存
protected void setup(Context context) throws Exception{
//加载缓存文件
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("pd.txt"),"UTF-8"));
String line;
while(StringUtils.isNotEmpty(line=br.readLine())){
String[] fields = line.split("\t");
pdMap.put(fields[0],fields[1]);
}
br.close();
}
//2、map
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptException{
//获取数据
String line = value.toString();
//切割
String[] fields = line.split("\t");
//获取订单中的商品id
String pid = fields[1];
//根据id获取商品名
String pname = pdMap.get(pid);
//拼接数据并输出
line = line +"\t"+ pname;
context.write(new Text(line), NullWritable.get());
}
}
public class FileDriver{
public static void main(String[] args) throws IOException, ClassNotFoundException,InterruptException{
Configuration conf = new Configuration();
Job job = Job.getInstance();
job.setJarByClass(FileDriver.class);
job.setMapperClass(Cachemapper.class);
job.setMapOutputValueClass(Text.class);
job.setMapOutputKeyClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("C:/in"));
FileOutputFormat.setOutputPath(job, new Path("c:/out"));
//加载缓存商品数据
job.addCacheFile(new URI("file:///c:/inputcache/pd.txt"));
//设置一下reducetask的数量,默认为1
job.setNumReduceTask(0);
job.waitForCompletion(true);
}
//生成的结果文件part-r-0000,经过了reduce
//part-m-0000,只有map,没有reduce