MapReduce实现-MIT6.824分布式系统 Lab1

Lab详情页:http://nil.csail.mit.edu/6.824/2022/labs/lab-mr.html

本文根据上面详情页中的要求和引导,简要完成了一个分布式MapReduce的实现

代码仓库:Pokhanov/MIT6.824 Distributed System Lab - 码云 - 开源中国 (gitee.com)

前置知识

MapReduce模型 论文摘要-MapReduce: Simplified Data Processing on Large Clusters-CSDN博客

目标

实现分布式MapReduce。这个系统由两个部分组成:

  • coordinator程序(对应论文摘要中的主副本)
  • worker程序(对应论文摘要中的工人副本)

coordinator负责分配任务给workers,以及在有worker发生故障时进行调度处理,确保整个任务能够完成。

worker负责执行具体任务,会涉及读取输入、处理数据(包括调用用户提供的Map、Reduce函数)、将输出写入特定文件。

实际运行时,会启动一个coordinator进程和多个worker进程(并行)。每个worker进程都会通过RPC(远程过程调用)和coordinator交互,以此来获取任务/返回执行结果。

现实中真正的MapReduce系统,不同的worker进程会运行在不同的机器上,但在这个实验中所有进程都运行在一台机器上。

实现

1.mrcoordinator.go

coordinator程序入口

package main

//
// start the coordinator process, which is implemented
// in ../mr/coordinator.go
//
// go run mrcoordinator.go pg*.txt
//
// Please do not change this file.
//

import (
	"fmt"
	"os"
	"time"

	"6.824/mr"
)

func main() {
	if len(os.Args) < 2 {
		fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n")
		os.Exit(1)
	}

	m := mr.MakeCoordinator(os.Args[1:], 10)
	for m.Done() == false {
		time.Sleep(time.Second)
	}

	time.Sleep(time.Second)
	fmt.Printf("调度进程结束 \n")
}
2.coordinator.go

coordinator的具体实现。主要作用是管理任务状态、调度任务。包括计时任务执行时间,超过一定值(这里的实现为10s)则重新分配。

package mr

import (
	"errors"
	"fmt"
	"log"
	"net"
	"net/http"
	"net/rpc"
	"os"
	"sync"
	"time"
)

type Coordinator struct {
	processingTasks   map[int]*TaskCor // 处理中的任务
	unstartTasks      map[int]*TaskCor // 未开始的任务
	finishedTasks     map[int]*TaskCor // 已完成的任务
	mu                sync.Mutex       // 锁
	mu2               sync.Mutex       // 重入锁
	intermediateFiles [][]string       // 中间文件列表
	nReduce           int              // Reduce任务的数量,用户自定义参数
	outputFiles       []string         // 输出文件列表
	ret               bool             // 任务是否完成
	crashedWorkers    []int            // 故障的Worker进程
}

type TaskCor struct {
	Id                    int      // 任务编号
	ExcutedTime           int      // 任务已执行时间
	Category              string   // 任务类别 map/reduce
	Split                 []string // 输入
	ResiponsibleWorkerUid int      // 所负责的工人进程的uid
	mu                    sync.Mutex
}

var inputFileNum int

// Your code here -- RPC handlers for the worker to call.
// 响应woker进程的AskForNewTask
func (c *Coordinator) AssignTask(args *AskForNewTaskArgs, reply *AskForNewTaskReply) error {
	c.mu.Lock()

	if len(c.unstartTasks) == 0 {
		err := errors.New("无未开始任务,不分配任务 ")
		c.mu.Unlock()
		return err
	}

	if contains(&c.crashedWorkers, args.WorkerUid) != -1 {
		err := errors.New("当前请求的woker已被定义为crashed,不分配任务 ")
		c.mu.Unlock()
		return err
	}

	for taskId, task := range c.unstartTasks {
		fmt.Printf("分配任务 %v \n", task.Id)

		replyTask := Task{}
		replyTask.Id = task.Id
		replyTask.Category = task.Category
		replyTask.Split = task.Split

		reply.Task = &replyTask
		reply.NReduce = c.nReduce

		task.ResiponsibleWorkerUid = args.WorkerUid

		// 更改任务所在队列 未开始->处理中
		delete(c.unstartTasks, taskId)
		c.processingTasks[taskId] = task
		break
	}
	c.mu.Unlock()
	return nil
}

func (c *Coordinator) MapTaskFinish(args *MapTaskFinishArgs, reply *MapTaskFinishReply) error {
	c.mu.Lock()
	taskCor := c.processingTasks[args.Task.Id]
	if taskCor == nil {
		fmt.Printf("MAP-完成任务不在processing列表中,不保存结果 \n")
		c.mu.Unlock()
		return nil
	}

	if args.WorkerUid != taskCor.ResiponsibleWorkerUid {
		fmt.Printf("MAP-完成woker与任务负责woker uid不符,重新执行 \n")
		c.mu.Unlock()
		return nil
	}

	// 接收输出文件
	for i := 0; i < len(args.FinishedMidFiles); i++ {
		for j := 0; j < len(args.FinishedMidFiles[i]); j++ {
			c.intermediateFiles[i] = append(c.intermediateFiles[i], args.FinishedMidFiles[i][j])
		}
	}

	// 更改任务所在队列 处理中->已完成
	delete(c.processingTasks, args.Task.Id)
	c.finishedTasks[args.Task.Id] = taskCor

	// 接收Map输出,应该是所有Map结束后,Reduce才开始
	// 根据目前中间文件数量判断是否有Reduce任务加入到unstartTasks中
	for i := 0; i < len(c.intermediateFiles); i++ {
		if len(c.intermediateFiles[i]) == inputFileNum {
			taskCor := TaskCor{}
			taskCor.Id = i
			taskCor.Category = "Reduce"
			taskCor.Split = c.intermediateFiles[i]

			c.unstartTasks[taskCor.Id] = &taskCor
		}
	}

	c.mu.Unlock()
	return nil
}

func (c *Coordinator) ReduceTaskFinish(args *ReduceTaskFinishArgs, reply *ReduceTaskFinishReply) error {
	c.mu.Lock()

	taskCor := c.processingTasks[args.Task.Id]
	if taskCor == nil {
		fmt.Printf("Reduce-完成任务不在processing列表中,不保存结果 \n")
		c.mu.Unlock()
		return nil
	}

	if args.WorkerUid != taskCor.ResiponsibleWorkerUid {
		fmt.Printf("Reduce-完成woker与任务负责woker uid不符,重新执行 \n")
		c.mu.Unlock()
		return nil
	}

	// 把woker进程传回的输出文件记入输出文件队列
	c.outputFiles = append(c.outputFiles, args.OutPutFileName)

	// 更改任务所在队列 处理中->已完成
	delete(c.processingTasks, args.Task.Id)
	c.finishedTasks[args.Task.Id] = taskCor

	// 通过输出文件队列长度判断整个MapReduce任务是否已完成
	if len(c.outputFiles) == c.nReduce {
		c.ret = true
	}

	c.mu.Unlock()
	return nil
}

// start a thread that listens for RPCs from worker.go
func (c *Coordinator) server() {
	rpc.Register(c)
	rpc.HandleHTTP()
	//l, e := net.Listen("tcp", ":1234")
	sockname := coordinatorSock()
	os.Remove(sockname)
	l, e := net.Listen("unix", sockname)
	if e != nil {
		log.Fatal("listen error:", e)
	}
	go http.Serve(l, nil)
}

// main/mrcoordinator.go calls Done() periodically to find out
// if the entire job has finished.
func (c *Coordinator) Done() bool {
	c.mu.Lock()
	res := c.ret
	c.mu.Unlock()
	return res
}

// create a Coordinator.
// main/mrcoordinator.go calls this function.
// nReduce is the number of reduce tasks to use.
func MakeCoordinator(files []string, nReduce int) *Coordinator {
	// 初始化Coordinator相关参数
	c := Coordinator{}
	c.nReduce = nReduce
	c.unstartTasks = make(map[int]*TaskCor)
	c.processingTasks = make(map[int]*TaskCor)
	c.finishedTasks = make(map[int]*TaskCor)
	c.ret = false
	inputFileNum = len(files)
	for i := 0; i < nReduce; i++ {
		var curList []string
		c.intermediateFiles = append(c.intermediateFiles, curList)
	}

	// 每个输入文件生成一个Map任务
	for i := 0; i < len(files); i++ {
		taskCor := TaskCor{}
		taskCor.Id = nReduce + 1 + i
		taskCor.Category = "Map"
		taskCor.Split = append(taskCor.Split, files[i])
		c.mu.Lock()
		c.unstartTasks[taskCor.Id] = &taskCor
		c.mu.Unlock()
	}

	c.server() // 监听RPC

	go TimeTaskExcution(&c) // 开启任务处理计时线程

	return &c
}

// 任务处理计时
func TimeTaskExcution(c *Coordinator) {
	for {
		c.mu.Lock()

		// 对所有正在处理中的任务计时
		for _, taskCor := range c.processingTasks {

			taskCor.mu.Lock()
			taskCor.ExcutedTime += 1
			taskCor.mu.Unlock()

			// 如果当前任务的执行时间超过10s
			if taskCor.ExcutedTime >= 10 {
				fmt.Printf("待开始任务列表: %v \n", c.unstartTasks)
				fmt.Printf("处理中任务列表: %v \n", c.processingTasks)
				fmt.Printf("已完成任务列表: %v \n", c.finishedTasks)

				fmt.Printf("任务 %v 超时,待重新分配 \n", taskCor.Id)
				fmt.Printf("重分配前处理中任务列表: %v \n", c.processingTasks)
				reAssignTask(c, taskCor) // 重新分配该任务
				fmt.Printf("重分配后处理中任务列表: %v \n", c.processingTasks)

			}
		}

		c.mu.Unlock()

		time.Sleep(time.Second) //休眠1s
	}
}

// 重新分配超时任务
func reAssignTask(c *Coordinator, taskCor *TaskCor) {
	c.mu2.Lock()

	// 将先前执行该任务的woker标记为crashed
	c.crashedWorkers = append(c.crashedWorkers, taskCor.ResiponsibleWorkerUid)

	// 更改任务所在队列 处理中->未开始
	delete(c.processingTasks, taskCor.Id)
	c.unstartTasks[taskCor.Id] = taskCor

	// 重置 任务执行时间、负责woker进程uid
	taskCor.mu.Lock()
	taskCor.ExcutedTime = 0
	taskCor.ResiponsibleWorkerUid = 0
	taskCor.mu.Unlock()

	c.mu2.Unlock()
}

// 功能函数 检查list中是否有aim
func contains(list *[]int, aim int) int {
	for index, value := range *list {
		if aim == value {
			return index
		}
	}
	return -1
}
3.mrworker.go

worker程序入口

package main

//
// start a worker process, which is implemented
// in ../mr/worker.go. typically there will be
// multiple worker processes, talking to one coordinator.
//
// go run mrworker.go wc.so
//
// Please do not change this file.
//

import (
	"fmt"
	"log"
	"os"
	"plugin"

	"6.824/mr"
)

func main() {
	if len(os.Args) != 2 {
		fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
		os.Exit(1)
	}

	mapf, reducef := loadPlugin(os.Args[1])

	mr.Worker(mapf, reducef)
}

// load the application Map and Reduce functions
// from a plugin file, e.g. ../mrapps/wc.so
func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
	p, err := plugin.Open(filename)
	if err != nil {
		log.Fatalf("cannot load plugin %v", err.Error())
	}
	xmapf, err := p.Lookup("Map")
	if err != nil {
		log.Fatalf("cannot find Map in %v", filename)
	}
	mapf := xmapf.(func(string, string) []mr.KeyValue)
	xreducef, err := p.Lookup("Reduce")
	if err != nil {
		log.Fatalf("cannot find Reduce in %v", filename)
	}
	reducef := xreducef.(func(string, []string) string)

	return mapf, reducef
}
4.worker.go

worker的具体实现。主要作用是执行Map\Reduce任务。包括读取输入、调用用户提供的mapf\reducef函数、将输出写入文件。

package mr

import (
	"encoding/json"
	"fmt"
	"hash/fnv"
	"io/ioutil"
	"log"
	"net/rpc"
	"os"
	"sort"
	"strconv"
	"strings"
	"time"
)

// for sorting by key.
type ByKey []KeyValue

// for sorting by key.
func (a ByKey) Len() int           { return len(a) }
func (a ByKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }

// Map functions return a slice of KeyValue.
type KeyValue struct {
	Key   string
	Value string
}

// use ihash(key) % NReduce to choose the reduce
// task number for each KeyValue emitted by Map.
func ihash(key string) int {
	h := fnv.New32a()
	h.Write([]byte(key))
	return int(h.Sum32() & 0x7fffffff)
}

type Task struct {
	Id       int      // 任务编号
	Category string   // 任务类别 map/reduce
	Split    []string // 输入
}

var waitingTime int
var uid int

// main/mrworker.go calls this function.
func Worker(mapf func(string, string) []KeyValue,
	reducef func(string, []string) string) {

	uid = int(time.Now().UnixNano())
	fmt.Printf("开始进程 %v \n", uid)

	for {
		reply := AskForNewTask()

		if reply == nil || reply.Task == nil {
			// 如果请求新任务的返回值为空
			time.Sleep(time.Second)
			waitingTime += 1
			// waitingTime超过一定值 结束线程
			if waitingTime >= 30 {
				fmt.Printf("进程 %v 退出 \n", uid)
				return
			}
		} else {
			// 处理任务
			curTask := reply.Task
			waitingTime = 0 // 重置waitingTime
			if curTask.Category == "Map" {
				Map(mapf, reply)
			}
			if curTask.Category == "Reduce" {
				Reduce(reducef, reply)
			}
		}
	}
}

func Map(mapf func(string, string) []KeyValue, reply *AskForNewTaskReply) {
	curTask := reply.Task
	var midFileMap map[int]*os.File

	for _, filename := range curTask.Split {
		// 读取输入
		filePath := "/root/go/src/6.824/src/main/" + filename
		file, err := os.Open(filePath)
		if err != nil {
			log.Fatalf("cannot open %v", err.Error())
		}
		content, err := ioutil.ReadAll(file)
		if err != nil {
			log.Fatalf("cannot read %v", filename)
		}
		file.Close()

		// 使用用户提供的mapf处理输入、排序结果
		kva := mapf(filename, string(content))
		sort.Sort(ByKey(kva))

		// 不同的半成品键值对,处理后需要放到nReduce个不同文件里
		// midFileMap <键哈希值%nReduce, 文件>
		midFileMap = ConstructMidFileMap(reply.NReduce, curTask.Id)
		for _, kv := range kva {
			file := midFileMap[ihash(kv.Key)%10]
			enc := json.NewEncoder(file)
			err := enc.Encode(&kv)
			if err != nil {
				log.Fatalf("中间键值对写入过程出错" + err.Error())
			}
		}
	}

	// 为了防止Map任务还没完成,其输出就被Reduce任务读取
	// 在内容完全写入后,再将临时文件名改为最终文件名
	var finishedMidFiles [][]string
	for i := 0; i < reply.NReduce; i++ {
		var curList []string
		finishedMidFiles = append(finishedMidFiles, curList)
	}
	for k, v := range midFileMap {
		finalNameSlices := strings.Split(v.Name(), "-")
		finalName := "mr-" + finalNameSlices[1] + "-" + finalNameSlices[2]
		os.Rename(v.Name(), finalName)
		finishedMidFiles[k] = append(finishedMidFiles[k], finalName)
	}

	// 将输出文件列表回传给coordinator
	args := MapTaskFinishArgs{}
	args.Task = curTask
	args.FinishedMidFiles = finishedMidFiles
	args.WorkerUid = uid

	ok := call("Coordinator.MapTaskFinish", &args, &reply)
	if ok {
		fmt.Printf("task %v 已完成\n", reply.Task.Id)
	} else {
		fmt.Printf("call failed!\n")
	}
}

// 初始化 midFileMap <键哈希值%nReduce, 文件>
func ConstructMidFileMap(nReduce int, taskId int) map[int]*os.File {
	midFileMap := make(map[int]*os.File)
	for i := 0; i < nReduce; i++ {
		tmpFileName := "mr-" + strconv.Itoa(taskId) + "-" + strconv.Itoa(i) + "-tmp"
		ofile, _ := os.Create(tmpFileName)
		midFileMap[i] = ofile
	}
	return midFileMap
}

func Reduce(reducef func(string, []string) string, reply *AskForNewTaskReply) {
	curTask := reply.Task
	intermediate := GetIntermediate(curTask.Split) // 读取半成品键值对

	// 创建输出文件
	outPutFileName := "mr-out-" + strconv.Itoa(curTask.Id)
	ofile, _ := os.Create(outPutFileName)

	i := 0
	for i < len(intermediate) {
		// 合并键相同的半成品键值对
		j := i + 1
		for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
			j++
		}
		values := []string{}
		for k := i; k < j; k++ {
			values = append(values, intermediate[k].Value)
		}

		// 调用用户提供的reducef处理
		output := reducef(intermediate[i].Key, values)

		// 将结果写入输出文件
		fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)

		i = j
	}

	// 将输出文件名回传给coordinator
	args := ReduceTaskFinishArgs{}
	args.Task = curTask
	args.OutPutFileName = outPutFileName
	args.WorkerUid = uid
	ok := call("Coordinator.ReduceTaskFinish", &args, &reply)
	if ok {
		fmt.Printf("输出文件 %v 已完成\n", outPutFileName)
	} else {
		fmt.Printf("call failed!\n")
	}

}

// 读取所有由该Reduce任务负责的半成品键值对
func GetIntermediate(split []string) []KeyValue {
	var intermediate []KeyValue
	for index := range split {
		file, err := os.Open(split[index])
		if err != nil {
			log.Fatalf("cannot open %v", split[index])
		}
		dec := json.NewDecoder(file)
		for {
			var kv KeyValue
			if err := dec.Decode(&kv); err != nil {
				break
			}
			intermediate = append(intermediate, kv)
		}
	}
	sort.Sort(ByKey(intermediate))
	return intermediate
}

// 向coordinator请求新任务
func AskForNewTask() *AskForNewTaskReply {
	args := AskForNewTaskArgs{}
	args.WorkerUid = uid

	reply := AskForNewTaskReply{}
	reply.Task = nil

	ok := call("Coordinator.AssignTask", &args, &reply)
	if ok {
		fmt.Printf("%v 请求任务 获得任务 %v \n", uid, reply.Task.Id)
		return &reply
	} else {
		fmt.Printf("%v 请求任务 未被分配 \n", uid)
	}
	return nil
}

// send an RPC request to the coordinator, wait for the response.
// usually returns true.
// returns false if something goes wrong.
func call(rpcname string, args interface{}, reply interface{}) bool {
	// c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
	sockname := coordinatorSock()
	c, err := rpc.DialHTTP("unix", sockname)
	if err != nil {
		log.Fatal("dialing:", err)
	}
	defer c.Close()

	err = c.Call(rpcname, args, reply)
	if err == nil {
		return true
	}

	fmt.Println(err)
	return false
}
5.rpc.go

RPC(远程过程调用)用到的参数、返回值的结构定义。

package mr

//
// RPC definitions.
//
// remember to capitalize all names.
//

import (
	"os"
	"strconv"
)


// Add your RPC definitions here.

type AskForNewTaskArgs struct {
	WorkerUid int
}

type AskForNewTaskReply struct {
	Task    *Task
	NReduce int
}

type MapTaskFinishArgs struct {
	Task             *Task
	FinishedMidFiles [][]string
	WorkerUid        int
}

type MapTaskFinishReply struct {
}

type ReduceTaskFinishArgs struct {
	Task           *Task
	OutPutFileName string
	WorkerUid      int
}

type ReduceTaskFinishReply struct {
}

// Cook up a unique-ish UNIX-domain socket name
// in /var/tmp, for the coordinator.
// Can't use the current directory since
// Athena AFS doesn't support UNIX-domain sockets.
func coordinatorSock() string {
	s := "/var/tmp/824-mr-"
	s += strconv.Itoa(os.Getuid())
	return s
}

结果

Lab提供了一个测试脚本,会进行以下测试:

  1. 针对word-count和indexer两个MapReduce任务,该系统的输出结果与顺序执行的输出结果是否相同。
  2. 该系统的worker进程是否实现了并行。
  3. Map/Reduce任务执行次数是否正常。
  4. 是否有进程在MapReduce任务完成以前退出。
  5. 是否能在部分worker进程崩溃的情况下完成MapReduce任务。

上面的代码通过了所有测试。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值