1、下载hadoop-2.6.0.tar.gz包到本地
我解压后放入D:\hadoop-2.6.0-cdh5.9.3\
https://github.com/steveloughran/winutils.git
选择相应的版本
下载hadoop.dll、winutils.exe文件放入D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0\bin
配置环境变量
HADOOP_HOME=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
PATH后增加;%HADOOP_HOME%\bin
基于官方WordCount 修改 增加了,如果输出目录存在就删除的逻辑和系统参数
1 importorg.apache.hadoop.conf.Configuration;2 importorg.apache.hadoop.fs.FileSystem;3 importorg.apache.hadoop.fs.Path;4 importorg.apache.hadoop.io.IntWritable;5 importorg.apache.hadoop.io.Text;6 importorg.apache.hadoop.mapreduce.Job;7 importorg.apache.hadoop.mapreduce.Mapper;8 importorg.apache.hadoop.mapreduce.Reducer;9 importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;10 importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;11 importorg.apache.hadoop.util.GenericOptionsParser;12
13 importjava.io.IOException;14 importjava.util.Properties;15 importjava.util.StringTokenizer;16
17 public classWordCount {18
19 public static classTokenizerMapper20 extends Mapper{21
22 private final static IntWritable one = new IntWritable(1);23 private Text word = newText();24
25 public void map(Object key, Text value, Context context) throwsIOException, InterruptedException {26 StringTokenizer itr = newStringTokenizer(value.toString());27 while(itr.hasMoreTokens()) {28 word.set(itr.nextToken());29 context.write(word, one);30 }31 }32 }33
34 public static class IntSumReducer extends Reducer{35 private IntWritable result = newIntWritable();36
37 public void reduce(Text key, Iterable values, Context context) throwsIOException, InterruptedException {38 int sum = 0;39 for(IntWritable val : values) {40 sum +=val.get();41 }42 result.set(sum);43 context.write(key, result);44 }45 }46
47
48 /**
49 * 删除指定目录50 *51 *@paramconf52 *@paramdirPath53 *@throwsIOException54 */
55 private static void deleteDir(Configuration conf, String dirPath) throwsIOException {56 FileSystem fs =FileSystem.get(conf);57 Path targetPath = newPath(dirPath);58 if(fs.exists(targetPath)) {59 boolean delResult = fs.delete(targetPath, true);60 if(delResult) {61 System.out.println(targetPath + " has been deleted sucessfullly.");62 } else{63 System.out.println(targetPath + " deletion failed.");64 }65 }66
67 }68
69 public static void main(String[] args) throwsException {70 Properties props = System.getProperties(); //系统属性
71 System.out.println("Java的运行环境版本:" + props.getProperty("java.version"));72 System.out.println("Java的运行环境供应商:" + props.getProperty("java.vendor"));73 System.out.println("Java供应商的URL:" + props.getProperty("java.vendor.url"));74 System.out.println("Java的安装路径:" + props.getProperty("java.home"));75 System.out.println("Java的虚拟机规范版本:" + props.getProperty("java.vm.specification.version"));76 System.out.println("Java的虚拟机规范供应商:" + props.getProperty("java.vm.specification.vendor"));77 System.out.println("Java的虚拟机规范名称:" + props.getProperty("java.vm.specification.name"));78 System.out.println("Java的虚拟机实现版本:" + props.getProperty("java.vm.version"));79 System.out.println("Java的虚拟机实现供应商:" + props.getProperty("java.vm.vendor"));80 System.out.println("Java的虚拟机实现名称:" + props.getProperty("java.vm.name"));81 System.out.println("Java运行时环境规范版本:" + props.getProperty("java.specification.version"));82 System.out.println("Java运行时环境规范供应商:" + props.getProperty("java.specification.vender"));83 System.out.println("Java运行时环境规范名称:" + props.getProperty("java.specification.name"));84 System.out.println("Java的类格式版本号:" + props.getProperty("java.class.version"));85 String jars = props.getProperty("java.class.path");86 String[] split = jars.split(";", -1);87 for(String jar : split) {88 System.out.println("Java的类路径jar: " +jar);89 }90 //System.out.println("Java的类路径:" + props.getProperty("java.class.path"));
91 String paths = props.getProperty("java.library.path");92 String[] pathsSplit = paths.split(";", -1);93 for(String path : pathsSplit) {94 System.out.println("加载库时搜索的路径列表:" +path);95 }96 //System.out.println("加载库时搜索的路径列表:" + props.getProperty("java.library.path"));
97 System.out.println("默认的临时文件路径:" + props.getProperty("java.io.tmpdir"));98 System.out.println("一个或多个扩展目录的路径:" + props.getProperty("java.ext.dirs"));99 System.out.println("操作系统的名称:" + props.getProperty("os.name"));100 System.out.println("操作系统的构架:" + props.getProperty("os.arch"));101 System.out.println("操作系统的版本:" + props.getProperty("os.version"));102 System.out.println("文件分隔符:" + props.getProperty("file.separator")); //在 unix 系统中是"/"
103 System.out.println("路径分隔符:" + props.getProperty("path.separator")); //在 unix 系统中是":"
104 System.out.println("行分隔符:" + props.getProperty("line.separator")); //在 unix 系统中是"/n"
105 System.out.println("用户的账户名称:" + props.getProperty("user.name"));106 System.out.println("用户的主目录:" + props.getProperty("user.home"));107 System.out.println("用户的当前工作目录:" + props.getProperty("user.dir"));108 Configuration conf = newConfiguration();109 String[] otherArgs = newGenericOptionsParser(conf, args).getRemainingArgs();110 if (otherArgs.length < 2) {111 System.err.println("Usage: wordcount [...] ");112 System.exit(2);113 }114
115 //先删除output目录
116 deleteDir(conf, otherArgs[otherArgs.length - 1]);117
118 Job job = Job.getInstance(conf, "word count");119 job.setJarByClass(WordCount.class);120 job.setMapperClass(TokenizerMapper.class);121 job.setCombinerClass(IntSumReducer.class);122 job.setReducerClass(IntSumReducer.class);123 job.setOutputKeyClass(Text.class);124 job.setOutputValueClass(IntWritable.class);125 for (int i = 0; i < otherArgs.length - 1; ++i) {126 FileInputFormat.addInputPath(job, newPath(otherArgs[i]));127 }128 FileOutputFormat.setOutputPath(job,129 new Path(otherArgs[otherArgs.length - 1]));130 System.exit(job.waitForCompletion(true) ? 0 : 1);131 }132 }
View Code
pom依赖
1
2 org.apache.hadoop
3 hadoop-client
4 2.6.0
5
6
7 org.apache.hadoop
8 hadoop-common
9 2.6.0
10
11
12 org.apache.hadoop
13 hadoop-mapreduce-client-jobclient
14 2.6.0
15
View Code
添加本地依赖
集群信息
hdfs-site.xml
core-site.xml
放入resources 文件
给出输入输出参数运行即可
确保本地文件和maven依赖都被加载到