1.准备测试数据并上传至HDFS中。
2.创建Maven项目,修改配置pom.xml文件,添加log4j.properties(步骤省略)
3.自定义值的类型Student
4.编写Mapper模块
5.编写Reducer模块
6.编写JarUtil
package com.maidu.scorecount; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.jar.JarEntry; import java.util.jar.JarOutputStream; /** * @author:yt * @since:2024-05-20 */ public class JarUtil { public static String jar (Class<?> cls){ String outputJar =cls.getName()+".jar"; String input = cls.getClassLoader().getResource("").getFile(); System.out.println("input: "+input); input =input.substring(0,input.length()-1); input =input.substring(0,input.lastIndexOf("/")+1); jar(input,outputJar); return outputJar; } private static void jar(String inputFileName, String outputFileName) { JarOutputStream out =null; try { out =new JarOutputStream(new FileOutputStream(outputFileName)); File f =new File(inputFileName); jar(out,f,""); } catch (IOException e) { throw new RuntimeException(e); }finally { if(null!=out){ try { out.close(); } catch (IOException e) { throw new RuntimeException(e); } } } } private static void jar(JarOutputStream out, File f, String base) throws IOException { if(f.isDirectory()){ File []files =f.listFiles(); base = base.length()==0?"":base+"/"; for(File ff:files){ jar(out,ff,base+ff.getName()); } }else{ out.putNextEntry(new JarEntry(base)); FileInputStream in = new FileInputStream(f); byte[]buffer =new byte[1024]; int n =-1; while( (n=in.read(buffer)) !=-1 ){ out.write(buffer,0,n); } in.close(); } } }
7.编写Driver模块此类需要继承 Configured 实现 Tool接口。
最后直接运行主类ScoreCount.
(1)当前项目下生成jar文件
(2)控制台输出了日志信息
(3)查看最终结果: