package searchResult;
/**
* map函数
* 输出键:搜索词...
* 输出值:对应搜索结果数...+搜索时间...
*/
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SearchResultMap extends Mapper<Object, Text, Text, Text>{
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException{
String line = value.toString();
String[] str= line.split("\t");
//判断条件:...
if (str[15].equals("XXX") && str[17].compareTo("-1")!=0){
context.write(new Text(str[17]),new Text(str[9]+"\t"+str[10]));
}
}
}
package searchResult;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* reduce函数
* 输出键:搜索词...
* 输出键:对应搜索结果数...
*/
public class SearchResultReduce extends Reducer<Text,Text,Text,Text>{
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String time = "20010101 00:00:00";
String result = "";
java.text.DateFormat df = new java.text.SimpleDateFormat("yyyyMMdd HH:mm:ss");//search_time格式
for (Text val : values){
String line2 = val.toString();
String[] str2 = line2.split("\t");
//时间类型转换
java.util.Calendar c1 = java.util.Calendar.getInstance();
java.util.Calendar c2 = java.util.Calendar.getInstance();
try{
c1.setTime(df.parse(time));
c2.setTime(df.parse(str2[1]));
}catch(java.text.ParseException e){
System.err.println("start_time格式不正确");
}
//判断条件
if(c1.compareTo(c2)<0){
time=str2[1];
result = str2[0];
}
}
context.write(key, new Text(result));
}
}
package searchResult;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.SearchProject;
import com.SearchProject.SearchProjectMapper;
import com.SearchProject.SearchProjectReducer;
public class SearchResultDriver {
public static void main(String[] args) throws Exception, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: search project <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "searchResult:" + args[1]);
job.setJarByClass(SearchProject.class);
job.setMapperClass(SearchProjectMapper.class);//mapper
job.setReducerClass(SearchProjectReducer.class);//reducer
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path outpath = new Path(args[1]);
FileSystem hdfs = outpath.getFileSystem(conf);
if (hdfs.exists(outpath)) {
hdfs.delete(outpath, true);
}
FileInputFormat.addInputPath(job, new Path(args[0]));//输入hdfs路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));//输出hdfs路径
System.exit(job.waitForCompletion(true) ? 0 : 1);//等待完成退出.
}
}
PS:已做部分修改