job.addArchiveToClassPath(archive); // 缓存jar包到task运行节点的classpath中
job.addFileToClassPath(file); // 缓存普通文件到task运行节点的classpath中
job.addCacheArchive(uri); // 缓存压缩包文件到task运行节点的工作目录
job.addCacheFile(uri) // 缓存普通文件到task运行节点的工作目录
conf.set(“mapreduce.framework.name”, “local”);本地模式
job.addCacheFile(new URI(“file:/D:/srcdata/mapjoincache/pdts.txt#pdts.txt”));//添加缓存
使用时
本地模式测试
URI[] cacheFiles = context.getCacheFiles();
Path cacheFile = new Path(cacheFiles[0]);
BufferedReader br = new BufferedReader(new FileReader(cacheFile.toUri().getPath()));
集群模式时
conf.set("mapreduce.framework.name", "yarn");yarn模式
job.addCacheFile(new URI("hdfs://nameservice1/test/srcdata/mapjoincache/pdts.txt#pdts.txt"));//添加hdfs文件做缓存
protected void setup(Context context) throws IOException, InterruptedException {
URI[] cacheFiles = context.getCacheFiles();
Path cacheFile = new Path(cacheFiles[0]);
System.out.println("分布式缓存"+cacheFile);
System.out.println("分布式缓存路径"+cacheFile.toUri().getPath());
//DistributedCache.getLocalCacheFiles(conf)
//BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("pdts.txt")));
//BufferedReader br = new BufferedReader(new FileReader("pdts.txt"));
BufferedReader br = new BufferedReader(new FileReader(cacheFile.toUri().getPath()));
String line;
while (StringUtils.isNotEmpty(line = br.readLine())) {
String[] fields = line.split(",");
pdInfoMap.put(fields[0], fields[1]);
}
br.close();
}