项目场景:
问题描述:
在MapReduce程序中,使用上传至HDFS的文件作为缓存文件时,传输路径运行报错:
/user/MR/input/information.txt
java.io.FileNotFoundException: \user\MR\input\information.txt (系统找不到指定的路径。)
mapper类:
package CSDN综合练习;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
public class Mapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text,Text, Bean> {
Bean bean = new Bean();
HashMap map = new HashMap();
/**
* 缓存hdfs上的数据表
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void setup(Context context) throws IOException, InterruptedException {
try {
// 获取缓存文件
URI[] cacheFiles = context.getCacheFiles();
// 通过缓存文件获取路径
String path = cacheFiles[0].getPath().toString();
System.out.println(path);
// 读取文件信息
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path),"UTF-8"));
String line;
while (StringUtils.isNotEmpty(line = br.readLine())){
// 游戏 大数据 1
// 读取一行数据,拆分
String[] fields = line.split("\t");
map.put(fields[2],fields[0]+"\t"+fields[1]);
}
// 关闭资源
IOUtils.closeStream(br);
} catch (Exception e){
e.printStackTrace();
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1 张三 女
// 读取一行数据,进行拆分
String line = value.toString();
String[] fields = line.split("\t");
// 封装bean对象
String id = fields[0];
String name = fields[1];
String sex = fields[2];
String hobby = (String)map.get(id).toString().split("\t")[0];
String job = (String)map.get(id).toString().split("\t")[1];
bean.setId(id);
bean.setName(name);
bean.setSex(sex);
bean.setHobby(hobby);
bean.setJob(job);
// 写出
context.write(new Text(id),bean);
}
}
Driver类:
package CSDN综合练习;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Hdfs;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.net.URI;
public class Driver {
public static void main(String[] args) {
Job job;
Configuration conf = new Configuration();
try {
// 获取job
job = Job.getInstance(conf);
// 配置
job.setMapperClass(Mapper.class);
job.setReducerClass(Reduce.class);
job.setJarByClass(Driver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Bean.class);
job.setOutputKeyClass(Bean.class);
job.setOutputValueClass(LongWritable.class);
// 配置缓存
String path = "hdfs://192.168.64.178:9000/user/MR/input/information.txt";
job.addCacheFile(new URI(path));
// 自定义分区
job.setPartitionerClass(Partition.class);
// reduce计算的数量
job.setNumReduceTasks(2);
// 配置输入输出文件
FileInputFormat.setInputPaths(job,new Path("G:/Projects/IdeaProject-C/MapReduce/src/main/java/CSDN综合练习/data/student.txt"));
FileOutputFormat.setOutputPath(job,new Path("G:/Projects/IdeaProject-C/MapReduce/src/main/java/CSDN综合练习/output_withoutReducer1"));
// 提交job
boolean result = job.waitForCompletion(true);
System.exit(result? 0:1);
} catch (Exception e){
e.printStackTrace();
}
}
}
解决方案:
MR程序不能够同时获取本地和hdfs上的文件信息,它的路径形式以题中来看,要么全在本地,要么全在虚拟机上。