mapreduce运行wordcount

最新推荐文章于 2023-04-03 20:14:00 发布

WEI_69

最新推荐文章于 2023-04-03 20:14:00 发布

阅读量175

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/qq_42304949/article/details/101707123

版权

hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

环境

阿里轻量应用服务器
hadoop3.2.0
java1.8

主程序

//package WordCount;       
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class MyWordCount {
    public MyWordCount() {
    }
 
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
        if(otherArgs.length < 2) {
            System.err.println("Usage: wordcount <in> [<in>...] <out>");
            System.exit(2);
        }
 
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(MyWordCount.class);
        job.setMapperClass(MyWordCount.TokenizerMapper.class);
        job.setCombinerClass(MyWordCount.IntSumReducer.class);
        job.setReducerClass(MyWordCount.IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
 
        for(int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
 
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true)?0:1);
    }
 
    public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();
 
        public IntSumReducer() {
        }
 
        public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum = 0;
 
            IntWritable val;
            for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) {
                val = (IntWritable)i$.next();
            }
 
            this.result.set(sum);
            context.write(key, this.result);
        }
    }
 
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
        private static final IntWritable one = new IntWritable(1);
        private Text word = new Text();
 
        public TokenizerMapper() {
        }
 
        public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
 
            while(itr.hasMoreTokens()) {
                this.word.set(itr.nextToken());
                context.write(this.word, one);
            }
 
        }
    }
}

配置 CLASSPATH

export CLASSPATH=/usr/local/java8/lib:
/usr/local/hadoop-3.2.0/share/hadoop/common/hadoop-common-3.2.0.jar:
/usr/local/hadoop-3.2.0/share/hadoop/mapreduce/hadoop-mapreduceclient-core-3.2.0.jar:
/usr/local/hadoop-3.2.0/share/hadoop/common/lib/commons-cli-1.2.jar:
$CLASSPATH;

修改配置文件

mapred-site.xml

  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

        <property>
                <name>mapred.job.tracker</name>
                <value>wei123:9001</value>
        </property>
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.address</name>
                <value>wei123:10020</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>wei123:19888</value>
        </property>
        <property>
  			<name>yarn.app.mapreduce.am.env</name>
  			<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
		</property>
		<property>
		  <name>mapreduce.map.env</name>
		  <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
		</property>
		<property>
		  <name>mapreduce.reduce.env</name>
		  <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
		</property>

yarn-site.xml

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<!--nomenodeManager获取数据的方式是shuffle-->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
<!--指定Yarn的老大(ResourceManager)的地址-->
   <property>
                <name>yarn.resourcemanager.address</name>
                <value>wei123:18040</value>
         </property>

</configuration>

运行

start-dfs.sh    //启动HDFS
start-yarn.sh    //启动yarn
javac MyWordCount.java   //编译  
jar -cvf wordcount1.jar *.class			// jar打包
hadoop jar wordcount1.jar MyWordCount input output   //运行
hdfs dfs -cat output/*           //查看结果

jps查看进程

23539 ResourceManager
24005 Jps
23653 NodeManager
14743 NameNode
15063 SecondaryNameNode
14859 DataNode

分词结果

简单文本

2019.9.27	1
5577	1
Day	1
Festival	1
National	1
big	1
data	1
dell	1
insprion	1
wanling	1
wei	1

复杂文本Run through the rain

WEI_69

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
mapreduce运行wordcount

环境阿里轻量应用服务器hadoop3.2.0java1.8主程序//package WordCount; import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;...
复制链接

扫一扫

专栏目录