数据:
index1:
I love Beijing and I love China
I love Jinan I love
I love Taian
index2:
Beijing is Beijing is the capital of China
Jinan is the capital city of Shandong
I am am I
index3:
a city in eastern China
the capital of Shandong province
population 2,726,400
I am
Output:
2,726,400 index3:1;
Beijing index2:2;idnex1:1;
China idnex1:1;index2:1;index3:1;
I idnex1:5;index2:2;index3:1;
Jinan idnex1:1;index2:1;
Shandong index2:1;index3:1;
Taian idnex1:1;
a index3:1;
am index2:2;index3:1;
and idnex1:1;
capital index2:2;index3:1;
city index2:1;index3:1;
eastern index3:1;
in index3:1;
is index2:3;
love idnex1:5;
of index2:2;index3:1;
population index3:1;
province index3:1;
the index2:2;index3:1;
代码:
package MapReducer05;
import org. apache. hadoop. conf. Configuration;
import org. apache. hadoop. fs. Path;
import org. apache. hadoop. io. *;
import org. apache. hadoop. mapreduce. Job;
import org. apache. hadoop. mapreduce. Mapper;
import org. apache. hadoop. mapreduce. Reducer;
import org. apache. hadoop. mapreduce. lib. input. FileInputFormat;
import org. apache. hadoop. mapreduce. lib. input. FileSplit;
import org. apache. hadoop. mapreduce. lib. output. FileOutputFormat;
import org. apache. log4j. BasicConfigurator;
import java. io. DataInput;
import java. io. DataOutput;
import java. io. IOException;
import java. util. Set;
import java. util. TreeSet;
public class WordCountJob {
public static class MyMapper extends Mapper < LongWritable, Text, Text, Text> {
@Override
protected void map ( LongWritable key, Text value, Context context) throws IOException, InterruptedException {
FileSplit fsp = ( FileSplit) context. getInputSplit ( ) ;
String fname = fsp. getPath ( ) . getName ( ) ;
String[ ] str = value. toString ( ) . split ( " " ) ;
for ( String s : str) {
context. write ( new Text ( fname+ "_" + s) , new Text ( "1" ) ) ;
}
}
}
public static class MyReducer extends Reducer < Text, Text, Text, Text> {
Set< MyWritable> set = new TreeSet < MyWritable> ( ) ;
@Override
protected void reduce ( Text key, Iterable< Text> values, Context context) throws IOException, InterruptedException {
for ( Text t : values) {
MyWritable mw = new MyWritable ( ) ;
String[ ] fac = t. toString ( ) . split ( ":" ) ;
mw. setFname ( fac[ 0 ] ) ;
mw. setCount ( Integer. parseInt ( fac[ 1 ] ) ) ;
set. add ( mw) ;
}
String vres = "" ;
for ( MyWritable m : set) {
vres += m;
}
set. clear ( ) ;
context. write ( key, new Text ( vres) ) ;
}
}
public static void main ( String[ ] args) throws InterruptedException, IOException, ClassNotFoundException {
Configuration conf = new Configuration ( ) ;
BasicConfigurator. configure ( ) ;
Job job = Job. getInstance ( conf, "mr" ) ;
job. setJarByClass ( WordCountJob. class ) ;
job. setMapperClass ( MyMapper. class ) ;
job. setMapOutputKeyClass ( Text. class ) ;
job. setMapOutputValueClass ( Text. class ) ;
job. setCombinerClass ( MyCombiner. class ) ;
job. setReducerClass ( MyReducer. class ) ;
job. setOutputKeyClass ( Text. class ) ;
job. setOutputValueClass ( Text. class ) ;
FileInputFormat. setInputPaths ( job, new Path ( "C:\\Users\\Chen\\Desktop\\input\\dpsy\\*" ) ) ;
FileOutputFormat. setOutputPath ( job, new Path ( "C:\\Users\\Chen\\Desktop\\12" ) ) ;
System. exit ( job. waitForCompletion ( true ) ? 0 : 1 ) ;
}
}
class MyCombiner extends Reducer < Text, Text, Text, Text> {
@Override
protected void reduce ( Text key, Iterable< Text> values, Context context) throws IOException, InterruptedException {
String[ ] k = key. toString ( ) . split ( "_" ) ;
int count = 0 ;
for ( Text t : values) {
count += Integer. parseInt ( t. toString ( ) ) ;
}
context. write ( new Text ( k[ 1 ] ) , new Text ( k[ 0 ] + ":" + count) ) ;
}
}
class MyWritable implements WritableComparable < MyWritable> {
private String fname;
private int count;
public MyWritable ( ) { }
public MyWritable ( String fname, int count) {
this . fname = fname;
this . count = count;
}
public int compareTo ( MyWritable o) {
int ff = o. count- this . count;
if ( ff== 0 ) {
return this . fname. compareTo ( o. fname) ;
} else {
return ff;
}
}
@Override
public String toString ( ) {
return this . fname+ ":" + this . count+ ";" ;
}
public void write ( DataOutput out) throws IOException {
out. writeUTF ( fname) ;
out. writeInt ( count) ;
}
public void readFields ( DataInput in) throws IOException {
this . fname = in. readUTF ( ) ;
this . count = in. readInt ( ) ;
}
public String getFname ( ) {
return fname;
}
public void setFname ( String fname) {
this . fname = fname;
}
public int getCount ( ) {
return count;
}
public void setCount ( int count) {
this . count = count;
}
}