mapreduce 计算excel_针对微信的一篇推送附有的数据链接进行MapReduce统计

最新推荐文章于 2021-01-12 08:46:59 发布

三思叶

最新推荐文章于 2021-01-12 08:46:59 发布

阅读量102

点赞数

文章标签： mapreduce 计算excel

本文链接：https://blog.csdn.net/weixin_31448343/article/details/112024293

版权

1 importjava.io.IOException;2

3 importorg.apache.hadoop.conf.Configuration;4 importorg.apache.hadoop.conf.Configured;5 importorg.apache.hadoop.fs.Path;6 importorg.apache.hadoop.io.IntWritable;7 importorg.apache.hadoop.io.LongWritable;8 importorg.apache.hadoop.io.Text;9 importorg.apache.hadoop.mapreduce.Job;10 importorg.apache.hadoop.mapreduce.Mapper;11 importorg.apache.hadoop.mapreduce.Reducer;12 importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;13 importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;14 importorg.apache.hadoop.util.Tool;15 importorg.apache.hadoop.util.ToolRunner;16

17 /**

18 *https://mp.weixin.qq.com/s/3qQqN6qzQ3a8_Au2qfZnVg

19 * 针对[新兴生态系统：Python和R语言，谁更适用于大数据Spark/Hadoop和深度学习？]20 * 的全球数据进行一系列统计21 */

22 public class wechat extends Configured implementsTool {23

24 /**

25 * Map方法26 */

27 private static class ModuleMapper extends Mapper{28 private static final IntWritable mapOutputValue = new IntWritable(1) ;29 private Text mapOutputKey = newText() ;30 @Override31 public voidmap(LongWritable key, Text value, Context context)32 throwsIOException, InterruptedException {33

34 String input =value.toString();35 if(input.split(",").length<16) {36 return;37 }38 String[] arrStr = input.split(",");39 //Python-大数据计数器输出

40 if("1".equals(arrStr[2])&&"1".equals(arrStr[14])) {41 context.getCounter("WECHAT_MAPPER_COUNTERS", "Python_BigData").increment(1L);42 }43 //Python-Deep计数器输出

44 if("1".equals(arrStr[2])&&"1".equals(arrStr[13])) {45 context.getCounter("WECHAT_MAPPER_COUNTERS", "Python_Deep-Learning").increment(1L);46 }47 //R-大数据计数器输出

48 if("1".equals(arrStr[3])&&"1".equals(arrStr[14])) {49 context.getCounter("WECHAT_MAPPER_COUNTERS", "R_BigData").increment(1L);50 }51 //R-深度计数器输出

52 if("1".equals(arrStr[3])&&"1".equals(arrStr[13])) {53 context.getCounter("WECHAT_MAPPER_COUNTERS", "R_Deep-Learning").increment(1L);54 }55

56 arrStr = input.split(",")[16].split(";");57 //遍历

58 for(String tool: arrStr){59 //设置key

60 mapOutputKey.set(tool);61 //输出

62 context.write(mapOutputKey, mapOutputValue) ;63 }64 }65 }66

67 /**

68 * Reduce聚合结果69 */

70 private static class ModuleReducer extends Reducer{71 private IntWritable outputValue = newIntWritable() ;72 @Override73 protected void reduce(Text key, Iterablevalues, Context context)74 throwsIOException, InterruptedException {75

76 //定义临时变量，用于累加

77 int sum = 0;78

79 //遍历

80 for(IntWritable value: values){81 sum +=value.get() ;82 }83

84 if(sum < 500){85 //定义500以上的筛选

86 return;87 }88 //设置

89 outputValue.set(sum) ;90 //输出

91 context.write(key, outputValue) ;92

93 }94 }95

96 /**

97 * 驱动创建Job并提交运行返回状态码98 */

100 public int run(String[] args) throwsException {101 //创建一个Job

102 Job job =Job.getInstance(103 this.getConf() , wechat.class.getSimpleName()104 ) ;105 //设置job运行的class

106 job.setJarByClass(wechat.class);107

108 //设置Job109 //1. 设置 input，从哪里读取数据

110 Path inputPath = new Path(args[0]) ;111 FileInputFormat.addInputPath(job, inputPath);112

113 //2. 设置 mapper类

114 job.setMapperClass(ModuleMapper.class);115 //设置map 输出的key和value的数据类型

116 job.setMapOutputKeyClass(Text.class);117 job.setMapOutputValueClass(IntWritable.class);118

119 //3. 设置 reducer 类

120 job.setReducerClass(ModuleReducer.class);121 //设置 reducer 输出的key和value的数据类型

122 job.setOutputKeyClass(Text.class);123 job.setOutputValueClass(IntWritable.class);124 //设置ReduceTask个数125 //job.setNumReduceTasks(2);126

127 //4. 设置处理结果保存的路径

128 Path outputPath = new Path(args[1]) ;129 FileOutputFormat.setOutputPath(job, outputPath);130

131 //提交job运行

132 boolean isSuccess = job.waitForCompletion(true) ;133

134 //返回状态

135 return isSuccess ? 0 : 1;136 }137

138 /**

139 *140 *@paramargs141 *@throwsException142 */

143 public static void main(String[] args) throwsException {144 if(2 >args.length){145 System.out.println("Usage: " + wechat.class.getSimpleName() +" ");146 return;147 }148

149 //读取HADOOP中配置文件, core-*.xml hdfs-*.xml yarn-*.xml mapred-*.xml

150 Configuration conf = newConfiguration() ;151

152 //运行Job

153 int status = ToolRunner.run(conf, newwechat(), args) ;154

155 //exit program

156 System.exit(status);157 }158

159 }

三思叶

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
mapreduce 计算excel_针对微信的一篇推送附有的数据链接进行MapReduce统计

1 importjava.io.IOException;23 importorg.apache.hadoop.conf.Configuration;4 importorg.apache.hadoop.conf.Configured;5 importorg.apache.hadoop.fs.Path;6 importorg.apache.hadoop.io.IntWritable;7 importo...
复制链接

扫一扫