初学Hadoop,就以此篇作为开始吧,记录下自己学习过程的点点滴滴。
环境描述:master和slaves在同一局域网,并且只有master可以连接外网,在master上同时部署一个web应用。
问题需求:在web程序里可以输出Map和Reduce的日志信息。
具体解决思路附上代码:
@RequestMapping({"/task/log/"})
public String showTaskLog(Model model,String jobStr,HttpServletResponse response) throws Exception{
JobID jobId = JobID.forName(jobStr);
JobClient tracker = HadoopUtils.getJobClient();
//这里以输出Reduce的日志为主,没有的情况下则输出Map的
TaskReport[] mtrs = tracker.getMapTaskReports(jobId);
TaskReport[] rtrs = tracker.getReduceTaskReports(jobId);
TaskReport[] trs = ArrayUtils.isEmpty(rtrs) ? mtrs : rtrs;
String taskId = "";
try{
taskId = trs[trs.length - 1].getTaskID().toString();
}catch(Exception e){
return null;
}
//task.log.url写在properties文件中:task.log.url=http://master:50030/taskdetails.jsp?tipid=%s
//下面用到一个Html工具,获取到所有日志的超链接href属性值,用于由master发出一个URL请求到具体的DataNode读取日志
Parser parser = new Parser(String.format(ConstantUtils.getConstant("task.log.url"), taskId));
NodeList nodeList = parser.extractAllNodesThatMatch(new NodeFilter() {
private static final long serialVersionUID = -2388460449128537715L;
@Override
public boolean accept(Node n) {
if(n instanceof LinkTag)
return true;
return false;
}
});
String logUrl = "";
for(int i = 0;i < nodeList.size();i++){
LinkTag tag = (LinkTag) nodeList.elementAt(i);
if("All".equalsIgnoreCase(tag.getStringText())){
logUrl = tag.extractLink();
break;
}
}
//获取日志信息
URL url = new URL(logUrl);
BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));
response.setCharacterEncoding("UTF-8");
PrintWriter out = response.getWriter();
while(reader.ready()){
out.println(reader.readLine());
}
reader.close();
out.flush();
out.close();
return null;
}