hadoop统计单词数实例

etc/hadoop配置文件

1.vi core-site.xml

<?xml version="2.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

  Licensed under the Apache License, Version 2.0 (the "License");

  you may not use this file except in compliance with the License.

  You may obtain a copy of the License at

 

    http://www.apache.org/licenses/LICENSE-2.0

 

  Unless required by applicable law or agreed to in writing, software

  distributed under the License is distributed on an "AS IS" BASIS,

  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  See the License for the specific language governing permissions and

  limitations under the License. See accompanying LICENSE file.

-->

 

<!-- Put site-specific property overrides in this file. -->

 

<configuration>

<property>

        <name>fs.default.name </name>

        <value>hdfs://localhost:9000</value>

</property>

</configuration>

2. vi mapred-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

  Licensed under the Apache License, Version 2.0 (the "License");

  you may not use this file except in compliance with the License.

  You may obtain a copy of the License at

 

    http://www.apache.org/licenses/LICENSE-2.0

 

  Unless required by applicable law or agreed to in writing, software

  distributed under the License is distributed on an "AS IS" BASIS,

  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  See the License for the specific language governing permissions and

  limitations under the License. See accompanying LICENSE file.

-->

 

<!-- Put site-specific property overrides in this file. -->

 

<configuration>

<property>

        <name>mapred.job.tracker </name>

        <value>localhost:9001</value>

</property>

 

</configuration>

3.vi hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

  Licensed under the Apache License, Version 2.0 (the "License");

  you may not use this file except in compliance with the License.

  You may obtain a copy of the License at

 

    http://www.apache.org/licenses/LICENSE-2.0

 

  Unless required by applicable law or agreed to in writing, software

  distributed under the License is distributed on an "AS IS" BASIS,

  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  See the License for the specific language governing permissions and

  limitations under the License. See accompanying LICENSE file.

-->

 

<!-- Put site-specific property overrides in this file. -->

 

<configuration>

<property>

        <name>fds.replication</name>

        <value>1</value>

 </property>

 

 <property>

        <name>dfs.name.dir</name>

        <value>/home/hdfs/name</value>

 </property>

 <property>

        <name>dfs.data.dir</name>

        <value>/home/hdfs/data</value>

 </property>

 

</configuration>

4.vi hadoop-env.sh

export JAVA_HOME=/opt/jdk1.8.0_65 调用jdk

 

 

 

 

 

5.启动

sbin下面是启动命令

./start-all.sh 

5.1jps 查看启动的服务 (五个)

15509 ResourceManager

14808 NameNode

15241 SecondaryNameNode

14974 DataNode

15679 NodeManager

6.关闭./stop-all.sh

 

 

hdfs的命令

hadoop fs -mkdir /user/trunk

hadoop fs -ls /user

hadoop fs -lsr /user   (递归的)

hadoop fs -put test.txt /user/trunk

hadoop fs -put test.txt .  (复制到hdfs当前目录下,首先要创建当前目录)

hadoop fs -get /user/trunk/test.txt . (复制到本地当前目录下)

hadoop fs -cat /user/trunk/test.txt

hadoop fs -tail /user/trunk/test.txt  (查看最后1000字节)

hadoop fs -rm /user/trunk/test.txt

hadoop fs -help ls (查看ls命令的帮助文档)

 

 

java代码

import java.io.IOException;

 

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

 

public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)

throws IOException, InterruptedException {

String[] arrayOfString1;

String val = value.toString();

 

String[] str = val.split(" ");

 

int j = (arrayOfString1 = str).length;

for (int i = 0; i < j; ++i) {

String s = arrayOfString1[i];

 

context.write(new Text(s), new IntWritable(1));

}

}

}

 

import java.io.IOException;

import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

 

public class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable>

{

  protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context)

    throws IOException, InterruptedException

  {

    int sum = 0;

 

    for (Iterator localIterator = values.iterator(); localIterator.hasNext(); ) { IntWritable val = (IntWritable)localIterator.next();

 

      sum += val.get();

    }

 

    context.write(key, new IntWritable(sum));

  }

}

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class WordCount

{

  public static void main(String[] args)

    throws Exception

  {

    Configuration conf = new Configuration();

    Job job = new Job(conf, "word count");

 

    job.setJarByClass(WordCount.class);

 

    job.setMapperClass(WordcountMapper.class);

 

    job.setCombinerClass(WordcountReducer.class);

 

    job.setReducerClass(WordcountReducer.class);

 

    job.setOutputKeyClass(Text.class);

 

    job.setOutputValueClass(IntWritable.class);

 

    FileInputFormat.addInputPath(job, new Path(args[0]));

 

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

 

    System.exit((job.waitForCompletion(true)) ? 0 : 1);

  }

}

 

hadoop-core-0.20.2.jar

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值