1 . 找共同朋友,数据格式如下
第一字母表示本人,其他是他的朋友,找出有共同朋友的人,和共同朋友是谁A B C D E F B A C D E C A B E D A B E E A B C D F A
答案如下:
AB E:C:D AC E:B AD B:E AE C:B:D BC A:E BD A:E BE C:D:A BF A CD E:A:B CE A:B CF A DE B:A DF A EF A
程序代码:
import java.io.IOException; import java.util.Set; import java.util.StringTokenizer; import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class FindFriend { public static class ChangeMapper extends Mapper<Object, Text, Text, Text> { @Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); Text owner = new Text(); Set<String> set = new TreeSet<String>(); owner.set(itr.nextToken()); while (itr.hasMoreTokens()) { set.add(itr.nextToken()); } String[] friends = new String[set.size()]; friends = set.toArray(friends); for(int i=0; i<friends.length; i++) { for(int j=i+1; j<friends.length; j++) { String outputkey = friends[i]+friends[j]; context.write(new Text(outputkey),owner); } } } } public static class FindReducer extends Reducer<Text,Text,Text,Text> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException,InterruptedException { String commonfriends =""; for (Text val : values) { if(commonfriends == "") { commonfriends = val.toString(); } else { commonfriends = commonfriends+":"+val.toString(); } } context.write(key, new Text(commonfriends)); } } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("args error"); System.exit(2); } Job job = new Job(conf, "FindFriend"); job.setJarByClass(FindFriend.class); job.setMapperClass(ChangeMapper.class); job.setCombinerClass(FindReducer.class); job.setReducerClass(FindReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
2 . 随意使用各种类型的脚本语言实现:批量将/etc目录下的所有文件中的$HADOOP_HOME替换成/home/local/hadoop
find /etc/ -exec sed -i 's/\$HADOOP_HOME/\/home\/local\/hadoop/g' {} \;
3 . combine发生在那个过程中?以及作用是什么?
map->combine->partition->shuffle->reduce
combiner仅作用于单个Mapper任务,每个Map任务可能会产生大量的输出,combiner的作用就是在Map端对输出先做一次合并,以减少传输到Reducer的数据量。使用combiner实现本地key的聚合,提升速度,减轻io压力。
4 . 杀死一个job
hadoop job -list拿到job-id hadoop job -kill job-id
5 . hbase常用基本命令,创建表,添加记录,查看记录,删除记录
create '表名称','列族名称1','列名族称2','列名族称N' put '表名','行名','列名','值' get '表名','行名' delete '表名','行名称','列名称'