[6][lab] lab1: map reduce

lab1 map reduce

对比了一下2020和之前的lab1,代码结构有一些变化,2018作业代码侧重于实现map reduce以及schedule部分,其他部分由框架给出,2020则是给出了map reduce部分,并将mr和app利用plugin分开表达,同时需要lab开发整个mr框架部分,借鉴已完成的2018可以顺利完成该部分实验。

  • master,由启动+wait,变更为启动+轮询,worker无变化
  • 整体框架,master维护了作业信息,输入的文件总数,整体作业进度信息,完成后完成清理和返回
  • work通过rpc注册自己的存在,然后按照master发布的task执行任务,交互通过共享文件系统实现

net/rpc

提供rpc功能,可以通过本地unix socket或者tcp socket,TODO 思考如何基于net基本功能实现一个rpc库

  • server指定类型,以及方法,都是可导出的
  • server注册服务,handle,listen,server来开启server rpc服务
  • client可以选择同步call或者异步Go方法来执行rpc
package server

import "errors"

type Args struct {
	A, B int
}

type Quotient struct {
	Quo, Rem int
}

type Arith int

func (t *Arith) Multiply(args *Args, reply *int) error {
	*reply = args.A * args.B
	return nil
}

func (t *Arith) Divide(args *Args, quo *Quotient) error {
	if args.B == 0 {
		return errors.New("divide by zero")
	}
	quo.Quo = args.A / args.B
	quo.Rem = args.A % args.B
	return nil
}

arith := new(Arith)
rpc.Register(arith)
rpc.HandleHTTP()
l, e := net.Listen("tcp", ":1234")
if e != nil {
	log.Fatal("listen error:", e)
}
go http.Serve(l, nil)



// client

client, err := rpc.DialHTTP("tcp", serverAddress + ":1234")
if err != nil {
	log.Fatal("dialing:", err)
}

// Synchronous call
args := &server.Args{7,8}
var reply int
err = client.Call("Arith.Multiply", args, &reply)
if err != nil {
	log.Fatal("arith error:", err)
}
fmt.Printf("Arith: %d*%d=%d", args.A, args.B, reply)


// Asynchronous call
quotient := new(Quotient)
divCall := client.Go("Arith.Divide", args, quotient, nil)
replyCall := <-divCall.Done	// will be equal to divCall
// check errors, print, etc.

encoding/gob

类似prototobuf用来序列化内存数据的库,比如用在rpc中用来序列化和反序列化网络请求,TODO 思考如何基于go基础设施实现一个序列化库,序列化库需要考虑什么?

  • 自描述
  • 指针会传递其指向的内容
  • 变长encoding
  • 只会encode结构体导出的成员
  • 内存不足时会申请内存
  • 支持基本类型,结构,slice,不支持channel,functions,会当做未导出成员忽略
struct { A, B int }

//can be sent from or received into any of these Go types:
struct { A, B int }	// the same
*struct { A, B int }	// extra indirection of the struct
struct { *A, **B int }	// extra indirection of the fields
struct { A, B int64 }	// different concrete value type; see below

//It may also be received into any of these:
struct { A, B int }	// the same
struct { B, A int }	// ordering doesn't matter; matching is by name
struct { A, B, C int }	// extra field (C) ignored
struct { B int }	// missing field (A) ignored; data will be dropped
struct { B, C int }	// missing field (A) ignored; extra field (C) ignored.

//Attempting to receive into these types will draw a decode error:
struct { A int; B uint }	// change of signedness for B
struct { A int; B float }	// change of type for B
struct { }			// no field names in common
struct { C, D int }		// no field names in common
package main

import (
    "bytes"
    "encoding/gob"
    "fmt"
    "log"
)

type P struct {
    X, Y, Z int
    Name    string
}

type Q struct {
    X, Y *int32
    Name string
}

// This example shows the basic usage of the package: Create an encoder,
// transmit some values, receive them with a decoder.
func main() {
    // Initialize the encoder and decoder. Normally enc and dec would be
    // bound to network connections and the encoder and decoder would
    // run in different processes.
    var network bytes.Buffer        // Stand-in for a network connection
    enc := gob.NewEncoder(&network) // Will write to network.
    dec := gob.NewDecoder(&network) // Will read from network.

    // Encode (send) some values.
    err := enc.Encode(P{3, 4, 5, "Pythagoras"})
    if err != nil {
        log.Fatal("encode error:", err)
    }
    err = enc.Encode(P{1782, 1841, 1922, "Treehouse"})
    if err != nil {
        log.Fatal("encode error:", err)
    }

    // Decode (receive) and print the values.
    var q Q
    err = dec.Decode(&q)
    if err != nil {
        log.Fatal("decode error 1:", err)
    }
    fmt.Printf("%q: {%d, %d}\n", q.Name, *q.X, *q.Y)
    err = dec.Decode(&q)
    if err != nil {
        log.Fatal("decode error 2:", err)
    }
    fmt.Printf("%q: {%d, %d}\n", q.Name, *q.X, *q.Y)

}

package main

import (
    "bytes"
    "encoding/gob"
    "fmt"
    "log"
)

// The Vector type has unexported fields, which the package cannot access.
// We therefore write a BinaryMarshal/BinaryUnmarshal method pair to allow us
// to send and receive the type with the gob package. These interfaces are
// defined in the "encoding" package.
// We could equivalently use the locally defined GobEncode/GobDecoder
// interfaces.
type Vector struct {
    x, y, z int
}

func (v Vector) MarshalBinary() ([]byte, error) {
    // A simple encoding: plain text.
    var b bytes.Buffer
    fmt.Fprintln(&b, v.x, v.y, v.z)
    return b.Bytes(), nil
}

// UnmarshalBinary modifies the receiver so it must take a pointer receiver.
func (v *Vector) UnmarshalBinary(data []byte) error {
    // A simple encoding: plain text.
    b := bytes.NewBuffer(data)
    _, err := fmt.Fscanln(b, &v.x, &v.y, &v.z)
    return err
}

// This example transmits a value that implements the custom encoding and decoding methods.
func main() {
    var network bytes.Buffer // Stand-in for the network.

    // Create an encoder and send a value.
    enc := gob.NewEncoder(&network)
    err := enc.Encode(Vector{3, 4, 5})
    if err != nil {
        log.Fatal("encode:", err)
    }

    // Create a decoder and receive a value.
    dec := gob.NewDecoder(&network)
    var v Vector
    err = dec.Decode(&v)
    if err != nil {
        log.Fatal("decode:", err)
    }
    fmt.Println(v)

}

package main

import (
    "bytes"
    "encoding/gob"
    "fmt"
    "log"
    "math"
)

type Point struct {
    X, Y int
}

func (p Point) Hypotenuse() float64 {
    return math.Hypot(float64(p.X), float64(p.Y))
}

type Pythagoras interface {
    Hypotenuse() float64
}

// This example shows how to encode an interface value. The key
// distinction from regular types is to register the concrete type that
// implements the interface.
func main() {
    var network bytes.Buffer // Stand-in for the network.

    // We must register the concrete type for the encoder and decoder (which would
    // normally be on a separate machine from the encoder). On each end, this tells the
    // engine which concrete type is being sent that implements the interface.
    gob.Register(Point{})

    // Create an encoder and send some values.
    enc := gob.NewEncoder(&network)
    for i := 1; i <= 3; i++ {
        interfaceEncode(enc, Point{3 * i, 4 * i})
    }

    // Create a decoder and receive some values.
    dec := gob.NewDecoder(&network)
    for i := 1; i <= 3; i++ {
        result := interfaceDecode(dec)
        fmt.Println(result.Hypotenuse())
    }

}

// interfaceEncode encodes the interface value into the encoder.
func interfaceEncode(enc *gob.Encoder, p Pythagoras) {
    // The encode will fail unless the concrete type has been
    // registered. We registered it in the calling function.

    // Pass pointer to interface so Encode sees (and hence sends) a value of
    // interface type. If we passed p directly it would see the concrete type instead.
    // See the blog post, "The Laws of Reflection" for background.
    err := enc.Encode(&p)
    if err != nil {
        log.Fatal("encode:", err)
    }
}

// interfaceDecode decodes the next interface value from the stream and returns it.
func interfaceDecode(dec *gob.Decoder) Pythagoras {
    // The decode will fail unless the concrete type on the wire has been
    // registered. We registered it in the calling function.
    var p Pythagoras
    err := dec.Decode(&p)
    if err != nil {
        log.Fatal("decode:", err)
    }
    return p
}

sync/Cond

c.L.Lock()
for !condition() {
    c.Wait()
}
... make use of condition ...
c.L.Unlock()
sync/Map

The Map type is optimized for two common use cases:

  • (1) when the entry for a given key is only ever written once but read many times, as in caches that only grow, or
  • (2) when multiple goroutines read, write, and overwrite entries for disjoint sets of keys. In these two cases, use of a Map may significantly reduce lock contention compared to a Go map paired with a separate Mutex or RWMutex.

sync/Once

var once sync.Once
onceBody := func() {
    fmt.Println("Only once")
}
done := make(chan bool)
for i := 0; i < 10; i++ {
    go func() {
        once.Do(onceBody)
        done <- true
    }()
}
for i := 0; i < 10; i++ {
    <-done
}

// Only once

sync/WaitGroup

var wg sync.WaitGroup
var urls = []string{
    "http://www.golang.org/",
    "http://www.google.com/",
    "http://www.somestupidname.com/",
}
for _, url := range urls {
    // Increment the WaitGroup counter.
    wg.Add(1)
    // Launch a goroutine to fetch the URL.
    go func(url string) {
        // Decrement the counter when the goroutine completes.
        defer wg.Done()
        // Fetch the URL.
        http.Get(url)
    }(url)
}
// Wait for all HTTP fetches to complete.
wg.Wait()

实现思路

  • worker注册过程,注册master使用rpc处理worker的注册,worker使用rpc向master注册,server rpc得到注册结果后,通过条件变量唤醒forwardRegistrations go程,其在利用信道做go程间通信,将信息传递给调度go程,TODO 为何不直接信道传递给调度者,猜测是因为防止错误导致rpc go程阻塞,这样rpc本身是不会出问题的
  • master rpc实现,实现注册接口,以及ShutDown供销毁自己使用
  • master setup过程,首先启动rpc servrer,然后异步开启任务执行run过程,包含schedule和finish,run过程首先分发所有map任务,直到全部完成分发reduce任务,全部完成后可以做merge操作,执行finish操作,包括关闭所有workers(通过rpc调用worker 的shutdown,以及通过rpc关闭master的rpc server),最后利用信道通知done信道,此时main进程Wait在这个信道中返回执行结果
  • master会在分发任务过程中阻塞,直到有worker通过rpc注册到master中可供分发任务
  • master任务的分发,map任务,m个输入文件就有m的map任务,reduce任务由job参数指定,随后将任务参数通过rpc下发到worker,调度机制保证不会给同一个worker同时发送两个任务;为了保证任务分发的并发,使用异步go程完成rpc调用,阻塞等待worker返回,rpc执行失败会将任务重新放入任务信道队列,执行结束也会将worker重新返给worker信道,task信道代表任务队列,registerChan代表可用worker信道,结束任务分发循环,这里依赖成功数目的判断结合select信道,通过success信道里传递成功数量信息,通过default来累加直到任务全部完成,每一次涉及结束包括rpc server关闭,调度任务关闭,代码实现都不是很优雅,需要寻找模式
  • worker进程,rpc实现shutdown、DoTask,主线程RunWorker首先向master注册,随后启动rpc server(TODO这里应该保证先启动server后注册),server的关闭目前还存在问题,待解决,随后根据请求阶段,指定doMap或者doReduce
  • doMap根据reduce数目,输出n的文件,按照key hash分配,1个输入文件,N的输出文件,M的map worker共计MxN的文件
  • doReduce根据reduce id,获取相应的mr-X-Y文件作为输入,N个输入文件,1个输出文件,N个reduce worker共计N个文件,若执行merge可以得到单个文件
  • 序列化反序列化KV到文件,使用了json Encoder和Decoder

reference

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值