cat wordcount.txt
a
b
a
c
d
f
g
d
d
g
f
e
b
a
c
e
a
/usr/local/hadoop/bin/hadoop fs -put ~/wordcount.txt phpinput
bin/hadoop
jar contrib/streaming/hadoop-streaming-1.0.4.jar
-mapper /data/www/hbase.6.cn/svnwww/map.php
-reducer /data/www/hbase.6.cn/svnwww/reduce.php
-input phpinput
-output phpoutput/wordcount
packageJobJar: [/data/hadoop/tmp/hadoop-unjar8802229556384958300/] [] /tmp/streamjob9208716827641064676.jar tmpDir=null
15/09/15 16:19:08 WARN snappy.LoadSnappy: Snappy native library is available
15/09/15 16:19:08 INFO util.NativeCodeLoader: Loaded the native-hadoop library
15/09/15 16:19:08 INFO snappy.LoadSnappy: Snappy native library loaded
15/09/15 16:19:08 INFO mapred.FileInputFormat: Total input paths to process : 1
15/09/15 16:19:08 INFO streaming.StreamJob: getLocalDirs(): [/data/hadoop/tmp//mapred/local]
15/09/15 16:19:08 INFO streaming.StreamJob: Running job: job_201509141416_0021
15/09/15 16:19:08 INFO streaming.StreamJob: To kill this job, run:
15/09/15 16:19:08 INFO streaming.StreamJob: /usr/local/hadoop-1.0.4/libexec/../bin/hadoop job -Dmapred.job.tracker=hdfs://node1:9901 -kill job_201509141416_0021
15/09/15 16:19:08 INFO streaming.StreamJob: Tracking URL: http://node1:50030/jobdetails.jsp?jobid=job_201509141416_0021
15/09/15 16:19:09 INFO streaming.StreamJob: map 0% reduce 0%
15/09/15 16:19:25 INFO streaming.StreamJob: map 100% reduce 0%
15/09/15 16:19:36 INFO streaming.StreamJob: map 100% reduce 100%
15/09/15 16:19:42 INFO streaming.StreamJob: Job complete: job_201509141416_0021
15/09/15 16:19:42 INFO streaming.StreamJob: Output: phpoutput/wordcount
/usr/local/hadoop/bin/hadoop fs -cat phpoutput/wordcount/part-00000
//好多php的notice,要用isset判断修改一下
a 4
b 2
c 2
d 3
e 2
f 2
g 2
map.php
#!/usr/local/php/bin/php
$count)
{
// tab-delimited
echo $word, chr(9), $count, PHP_EOL;
}
?>
reduce.php
#!/usr/local/php/bin/php
0)
{
$word2count[$word] = $count;
}
else
{
$word2count[$word] += $count;
}
}
// sort the words lexigraphically
//
// this set is NOT required, we just do it so that our
// final output will look more like the official Hadoop
// word count examples
ksort($word2count);
// write the results to STDOUT (standard output)
foreach ($word2count as $word => $count) {
echo $word, chr(9), $count, PHP_EOL;
}
?>