云计算实验 MapReduce编程

一、实验题目
MapReduce编程
二、实验内容
本实验利用 Hadoop 提供的 Java API 进行编程进行 MapReduce 编程。
三、实验目标
掌握MapReduce编程。
理解MapReduce原理

【实验作业】简单流量统计
有如下这样的日志文件:

13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13726230513 00-FD-07-A4-72-B8:CMCC 120.196.40.8 i02.c.aliimg.com 248 0 200
13826230523 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13726230533 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13726230543 00-FD-07-A4-72-B8:CMCC 120.196.100.82 Video website 1527 2106 200
13926230553 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13826230563 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13926230573 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
18912688533 00-FD-07-A4-72-B8:CMCC 220.196.100.82 Integrated portal 1938 2910 200
18912688533 00-FD-07-A4-72-B8:CMCC 220.196.100.82 i02.c.aliimg.com 3333 21321 200
13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 Search Engines 9531 9531 200
13826230523 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 2481 24681 200
该日志文件记录了每个手机用户在一段时间内的网络流量信息,具体字段含义为:

手机号码 MAC地址 IP地址 域名 上行流量(字节数) 下行流量(字节数) 套餐类型
根据以上日志,统计出每个手机用户在该时间段内的总流量(上行流量+下行流量),统计结果的格式为:

手机号码 字节数量

实验结果
在这里插入图片描述
实验代码

WcMap.java

import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

        public class WcMap extends Mapper<LongWritable, Text, Text, LongWritable>{
        @Override
        protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
                    String str = value.toString();
                    String[] words = StringUtils.split(str," ",10);
                    int i=0;
                    for(String word : words){
                        if(i==words.length-2||i==words.length-3)
                        context.write(new Text(words[0]), new LongWritable(Integer.parseInt(word)));
                        i++;
                    }
            }
        }

WcReduce.java

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WcReduce extends Reducer<Text, LongWritable, Text, LongWritable>{
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values,Context context)
            throws IOException, InterruptedException {
        long count = 0;
        for(LongWritable value : values){
            count += value.get();
        }
        context.write(key, new LongWritable(count));
    }
}

WcRunner.java

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.Scanner;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import java.net.URI;

public class WcRunner{
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        
        job.setJarByClass(WcRunner.class);
        
        job.setMapperClass(WcMap.class);
        job.setReducerClass(WcReduce.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        Scanner sc = new Scanner(System.in);
        System.out.print("inputPath:");
        String inputPath = sc.next();
        System.out.print("outputPath:");
        String outputPath = sc.next();

        try {
            FileSystem fs0 = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
            Path hdfsPath = new Path(outputPath);
            fs0.copyFromLocalFile(new Path("/headless/Desktop/workspace/mapreduce/WordCount/data/1.txt"),new Path("/mapreduce/WordCount/input/1.txt"));
            if(fs0.delete(hdfsPath,true)){
                System.out.println("Directory "+ outputPath +" has been deleted successfully!");
            }
        }catch(Exception e) {
            e.printStackTrace();
        }
        FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000"+inputPath));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000"+outputPath));
        job.waitForCompletion(true);
        try {
            FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
            Path srcPath = new Path(outputPath+"/part-r-00000");

            FSDataInputStream is = fs.open(srcPath);
            System.out.println("Results:");
            while(true) {
                String line = is.readLine();
                if(line == null) {
                    break;
                }
                System.out.println(line);
            }
            is.close();
        }catch(Exception e) {
            e.printStackTrace();
        }
    }
}

what’s up,这个云计算实验也太多了吧,不写了

我又写了一题

(二)【实验作业】索引倒排输出行号
在索引倒排实验中,我们可以得到每个单词分布在哪些文件中,以及在每个文件中出现的次数,修改以上实现,在输出的倒排索引结果中可以得到每个单词在每个文件中的具体行号信息。输出结果的格式如下:
单词 文件名:行号,文件名:行号,文件名:行号

实验结果:
MapReduce在3.txt的第一行出现了两次所以有两个1
在这里插入图片描述

import java.io.*;
import java.util.StringTokenizer;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class MyMapper extends Mapper<Object,Text,Text,Text>{
    private Text keyInfo = new Text();
    private Text valueInfo = new Text();
    private FileSplit split;
    int num=0;

    public void map(Object key,Text value,Context context)
            throws IOException,InterruptedException{
        num++;
        split = (FileSplit)context.getInputSplit();
        StringTokenizer itr = new StringTokenizer(value.toString());
        while(itr.hasMoreTokens()){
            keyInfo.set(itr.nextToken()+" "+split.getPath().getName().toString());
            valueInfo.set(num+"");
            context.write(keyInfo,valueInfo);
        }
    }
}

import java.io.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;

public class MyCombiner extends Reducer<Text,Text,Text,Text>{

    private Text info = new Text();

    public void reduce(Text key,Iterable<Text>values,Context context)
            throws IOException, InterruptedException{
        String  sum = "";
        for(Text value:values){
            sum += value.toString()+" ";
        }

                String record = key.toString();
        String[] str = record.split(" ");

        key.set(str[0]);
        info.set(str[1]+":"+sum);
        context.write(key,info);
    }
}
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MyReducer extends Reducer<Text,Text,Text,Text>{
    private Text result = new Text();
    public void reduce(Text key,Iterable<Text>values,Context context) throws

            IOException, InterruptedException{
        String value =new String();
        for(Text value1:values){
            value += value1.toString()+" ; ";
        }
        result.set(value);
        context.write(key,result);
    }
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.Scanner;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import java.net.URI;

public class MyRunner {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);

        job.setJarByClass(MyRunner.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setCombinerClass(MyCombiner.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);


        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        Scanner sc = new Scanner(System.in);
        System.out.print("inputPath:");
        String inputPath = sc.next();
        System.out.print("outputPath:");
        String outputPath = sc.next();

        try {
            FileSystem fs0 = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
            Path hdfsPath = new Path(outputPath);
            if(fs0.delete(hdfsPath,true)){
                System.out.println("Directory "+ outputPath +" has been deleted successfully!");
            }
        }catch(Exception e) {
            e.printStackTrace();
        }

        FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000"+inputPath));

        FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000"+outputPath));

        job.waitForCompletion(true);

        try {
            FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"), new Configuration());
            Path srcPath = new Path(outputPath+"/part-r-00000");

            FSDataInputStream is = fs.open(srcPath);
            System.out.println("Results:");
            while(true) {
                String line = is.readLine();
                if(line == null) {
                    break;
                }
                System.out.println(line);
            }
            is.close();
        }catch(Exception e) {
            e.printStackTrace();
        }

    }
}

后面的没写了

  • 9
    点赞
  • 24
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
以下是一个简单的实现,实现了学生信息的添加和查看、实验课程信息的添加和查看、选课功能等。由于篇幅限制,代码仅供参考: ```c++ #include <iostream> #include <string> #include <vector> #include <map> using namespace std; // 学生类 class Student { private: string id; // 学号 string name; // 姓名 string major; // 专业 int grade; // 年级 vector<string> courseList; // 已选课程列表 public: Student(string id, string name, string major, int grade) { this->id = id; this->name = name; this->major = major; this->grade = grade; } string getId() { return id; } string getName() { return name; } string getMajor() { return major; } int getGrade() { return grade; } // 添加已选课程 bool addCourse(string courseName) { if (courseList.size() >= 2) { // 每学期必修学分 cout << "每学期只能选两门实验课程!" << endl; return false; } for (string course : courseList) { // 检查是否已选该课程 if (course == courseName) { cout << "不能重复选课!" << endl; return false; } } courseList.push_back(courseName); cout << "选课成功!" << endl; return true; } // 查看已选课程 void viewCourseList() { if (courseList.empty()) { cout << "未选课程!" << endl; } else { cout << "已选课程:" << endl; for (string course : courseList) { cout << course << endl; } } } }; // 实验课程类 class Course { private: string name; // 课程名称 string labName; // 实验室名称 int maxNum; // 最大选课人数 vector<Student*> studentList; // 已选学生列表 public: Course(string name, string labName, int maxNum) { this->name = name; this->labName = labName; this->maxNum = maxNum; } string getName() { return name; } string getLabName() { return labName; } int getMaxNum() { return maxNum; } // 添加已选学生 bool addStudent(Student* student) { if (studentList.size() >= maxNum) { cout << "选课人数已满!" << endl; return false; } for (Student* s : studentList) { // 检查是否重复选课 if (s->getId() == student->getId()) { cout << "不能重复选课!" << endl; return false; } } studentList.push_back(student); cout << "选课成功!" << endl; return true; } // 查看已选学生 void viewStudentList() { if (studentList.empty()) { cout << "未选学生!" << endl; } else { cout << "已选学生:" << endl; for (Student* student : studentList) { cout << "学号:" << student->getId() << ",姓名:" << student->getName() << ",专业:" << student->getMajor() << ",年级:" << student->getGrade() << endl; } } } }; // 选课管理系统 class CourseSelectionSystem { private: map<string, Course*> courseMap; // 实验课程map,key为课程名称 vector<Student*> studentList; // 学生列表 public: // 添加学生 void addStudent() { cout << "请输入学号:"; string id; cin >> id; cout << "请输入姓名:"; string name; cin >> name; cout << "请输入专业:"; string major; cin >> major; cout << "请输入年级:"; int grade; cin >> grade; Student* student = new Student(id, name, major, grade); studentList.push_back(student); cout << "添加成功!" << endl; } // 查看学生信息 void viewStudentList() { if (studentList.empty()) { cout << "学生列表为空!" << endl; } else { cout << "学生列表:" << endl; for (Student* student : studentList) { cout << "学号:" << student->getId() << ",姓名:" << student->getName() << ",专业:" << student->getMajor() << ",年级:" << student->getGrade() << endl; student->viewCourseList(); // 查看已选课程 } } } // 添加实验课程 void addCourse() { cout << "请输入课程名称:"; string name; cin >> name; cout << "请输入实验室名称:"; string labName; cin >> labName; cout << "请输入最大选课人数:"; int maxNum; cin >> maxNum; Course* course = new Course(name, labName, maxNum); courseMap[name] = course; cout << "添加成功!" << endl; } // 查看实验课程信息 void viewCourseList() { if (courseMap.empty()) { cout << "实验课程列表为空!" << endl; } else { cout << "实验课程列表:" << endl; for (auto& kv : courseMap) { Course* course = kv.second; cout << "课程名称:" << course->getName() << ",实验室名称:" << course->getLabName() << ",最大选课人数:" << course->getMaxNum() << endl; course->viewStudentList(); // 查看已选学生 } } } // 学生选课 void selectCourse() { cout << "请输入学号:"; string id; cin >> id; Student* student = nullptr; for (Student* s : studentList) { if (s->getId() == id) { student = s; break; } } if (student == nullptr) { cout << "学号不存在!" << endl; return; } cout << "请输入要选的课程名称:"; string courseName; cin >> courseName; Course* course = courseMap[courseName]; if (course == nullptr) { cout << "课程不存在!" << endl; return; } course->addStudent(student); student->addCourse(courseName); } }; int main() { CourseSelectionSystem css; while (true) { cout << "请选择操作:" << endl; cout << "1. 添加学生" << endl; cout << "2. 查看学生信息" << endl; cout << "3. 添加实验课程" << endl; cout << "4. 查看实验课程信息" << endl; cout << "5. 学生选课" << endl; cout << "0. 退出" << endl; int choice; cin >> choice; switch (choice) { case 1: css.addStudent(); break; case 2: css.viewStudentList(); break; case 3: css.addCourse(); break; case 4: css.viewCourseList(); break; case 5: css.selectCourse(); break; case 0: return 0; default: cout << "输入错误,请重新输入!" << endl; break; } } return 0; } ``` 需要注意的地方有: 1. 学生和实验课程的信息需要使用类来封装,并且需要定义相应的getter和setter方法。 2. 学生和实验课程需要使用vector和map来存储,方便后续的查找和添加操作。 3. 选课需要检查学生是否已选该课程、每学期选课学分是否超过两门、选课人数是否已满等条件,需要仔细判断和处理。 4. 代码中使用了指针来存储学生和实验课程的对象,需要注意内存管理的问题。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

wow_awsl_qwq

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值