mapjoin操作

最新推荐文章于 2024-08-13 17:53:59 发布

领悟大数据

最新推荐文章于 2024-08-13 17:53:59 发布

阅读量1.5k

点赞数 1

分类专栏： hadoop 文章标签： mapjoin

本文链接：https://blog.csdn.net/weixin_42898914/article/details/84943483

版权

hadoop 专栏收录该内容

22 篇文章 0 订阅

订阅专栏

mapjoin操作
   思路：商品加载到内存中然后数据在map数据段输出前，进行替换
   public class Cachemapper extends Mapper<LongWritable, Text, Text, NullWritable>{
       Hashmap<String,String> pdmap = new Hashmap<>();

       //1、商品表加载到内存
       protected void setup(Context context) throws Exception{

           //加载缓存文件
           BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("pd.txt"),"UTF-8"));
           String line;
           while(StringUtils.isNotEmpty(line=br.readLine())){
               String[] fields = line.split("\t");
               pdMap.put(fields[0],fields[1]);
           }

           br.close();
       }

       //2、map
       public void map(LongWritable key, Text value, Context context)
               throws IOException, InterruptException{
           //获取数据
           String line = value.toString();

//切割
String[] fields = line.split("\t");

//获取订单中的商品id
String pid = fields[1];

//根据id获取商品名
String pname = pdMap.get(pid);

           //拼接数据并输出
           line = line +"\t"+ pname;
           context.write(new Text(line), NullWritable.get());
       }
   }

   public class FileDriver{
       public static void main(String[] args) throws IOException, ClassNotFoundException,InterruptException{
           Configuration conf = new Configuration();
           Job job = Job.getInstance();

job.setJarByClass(FileDriver.class);

job.setMapperClass(Cachemapper.class);

job.setMapOutputValueClass(Text.class);
job.setMapOutputKeyClass(NullWritable.class);

FileInputFormat.setInputPaths(job, new Path("C:/in"));
FileOutputFormat.setOutputPath(job, new Path("c:/out"));

//加载缓存商品数据
job.addCacheFile(new URI("file:///c:/inputcache/pd.txt"));

//设置一下reducetask的数量,默认为1
job.setNumReduceTask(0);

job.waitForCompletion(true);
}

//生成的结果文件part-r-0000,经过了reduce
//part-m-0000，只有map，没有reduce