HNU大数据并行处理系统(二)MapReduce任务

一、数据去重
首先下载好idea和jdk,网上搜一搜如何配置环境
在主机上下载和虚拟机上一样的Hadoop,bin进行替换,这部分详见↓
idea如何连接Hadoop
只需要看连接hdfs的部分,连上后的测试不用看
注意,连上之后代码框的右上角会出现一个循环状的图案,点击进行导入依赖(大概出现在图上的位置,当时忘记截图了)
在这里插入图片描述
代码参考
我的框架
在这里插入图片描述
在虚拟机上上传file1.txt和file2.txt,做了前一个实验应该知道怎么建文件夹并上传到集群
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
注意代码有修改,地址是这样得到的
在这里插入图片描述

 FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/input1/"));
 FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/output1/"));

run,报错
Permission denied: user=root, access=WRITE, inode=“/user“:hdfs:supergroup:drwxr-xr-x
在Hadoop文件的hdfs-site.xml加上

      <property>
        <name>dfs.permissions</name>
        <value>false</value>
      </property>

忘记是idea还是虚拟机的要加了,反正两个都修改也没事
现在ok了
在这里插入图片描述
二、数据排序

代码参考
我的框架
在这里插入图片描述
三、平均成绩

package lab1.task3;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class calcGPA {
    public calcGPA() {
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        String fileAddress1 = "hdfs://master:9000/input3/";
        String fileAddress2 = "hdfs://master:9000/";
        //String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
        String[] otherArgs = new String[]{fileAddress1+"math.txt", fileAddress1+"china.txt", fileAddress1+"english.txt", fileAddress2+"output3"};
        if(otherArgs.length < 2) {
            System.err.println("Usage: calcGPA <in> [<in>...] <out>");
            System.exit(2);
        }

        Job job = Job.getInstance(conf, "calc GPA");
        job.setJarByClass(calcGPA.class);
        job.setMapperClass(calcGPA.TokenizerMapper.class);
        job.setCombinerClass(calcGPA.IntSumReducer.class);
        job.setReducerClass(calcGPA.IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        for(int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true)?0:1);
    }

    public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        public IntSumReducer() {
        }

        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0;
            int count = 0;

            IntWritable val;
            for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get(),count++) {
                val = (IntWritable)i$.next();
            }


            int average = (int)sum/count;
            context.write(key, new IntWritable(average));
        }
    }


    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {

        public TokenizerMapper() {
        }

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString(), "\n");

            while(itr.hasMoreTokens()) {
                StringTokenizer iitr = new StringTokenizer(itr.nextToken());
                String name = iitr.nextToken();
                String score = iitr.nextToken();
                context.write(new Text(name), new IntWritable(Integer.parseInt(score)));
            }

        }
    }
}

在这里插入图片描述
四、单表关联

package lab1.task4;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class STjoin {
    public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
        public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
            String relationtype = new String();
            String line = value.toString();
            System.out.println("mapper...............");
            int i = 0;
            //遍历方法二:使用迭代器取出child和parent
            String[] values = new String[10];
            StringTokenizer itr = new StringTokenizer(line);
            while(itr.hasMoreTokens()){
                values[i] = itr.nextToken();
                i = i+1;
            }

            System.out.println("child:"+values[0]+"  parent:"+values[1]);
            if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1

                relationtype="1";
                context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
                System.out.println("key:"+values[1]+"  value: "+relationtype+"+"+values[0]);
                relationtype = "2";
                context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
                System.out.println("key:"+values[0]+"  value: "+relationtype+"+"+values[1]);
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text>{
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
            System.out.println("reduce.....................");
            System.out.println("key:"+key+"  values:"+values);
            if(time==0){
                context.write(new Text("grandchild"), new Text("grandparent"));
                time++;
            }
            int grandchildnum = 0;
            String grandchild[] = new String[10];
            int grandparentnum = 0;
            String grandparent[] = new String[10];

            String name = new String();
            //遍历方法二:用for循环
            for(Text val : values){
                //    String record = ite.next().toString();
                String record = val.toString();
                System.out.println("record: "+record);

                int i = 2;
                char relationtype = record.charAt(0);
                name = record.substring(i);

                System.out.println("name: "+name);

                if (relationtype=='1') {
                    grandchild[grandchildnum] = name;
                    grandchildnum++;
                }
                else{
                    grandparent[grandparentnum]=name;
                    grandparentnum++;
                }
            }
            //遍历方法三:就是详细方法的charAt(),一个一个字符遍历
            if(grandparentnum!=0&&grandchildnum!=0){
                for(int m = 0 ; m < grandchildnum ; m++){
                    for(int n = 0 ; n < grandparentnum; n++){
                        context.write(new Text(grandchild[m]), new Text(grandparent[n]));
                        System.out.println("grandchild: "+grandchild[m]+"  grandparent: "+grandparent[n]);
                    }
                }
            }
        }
    }
    public static void main(String [] args)throws Exception{
        Configuration conf = new Configuration();
        Job job = new Job(conf,"single table join");
        job.setJarByClass(STjoin.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/input4/"));
        FileOutputFormat.setOutputPath(job,new Path("hdfs://master:9000/output4/"));

        System.exit(job.waitForCompletion(true)? 0 : 1);
    }
}

在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值