6.824——实验一Part I: Map/Reduce input and output

这一部分实验主要就是完成map原始文件(input)到中间文件(intermediateFile),然后reduce中间文件到输出文件(output)的功能,相当于是map函数和reduce函数的具体调用,分别生成中间文件和结果文件的阶段。

1.doMap

func doMap(
	jobName string, // the name of the MapReduce job
	mapTask int, // which map task this is
	inFile string,
	nReduce int, // the number of reduce task that will be run ("R" in the paper)
	mapF func(filename string, contents string) []KeyValue,
)

这个函数的主要功能是将某一个分块文件,通过用户定义的Map函数(mapF)生成键值对序列,并按Key mod Nreduce 来Partition键值对到不同的分组中(1…Nreduce)。
下面是完整的函数内容

	contents := ""
	f, _ := os.Open(inFile)
	inputScanner := bufio.NewScanner(f)
	//similar while boolean
	for inputScanner.Scan() {
		contents += inputScanner.Text() + " "
	}

	kvs := mapF(jobName, contents)
	file2kv := make(map[string][]KeyValue)
	for _, kv := range kvs {
		//partition KeyValues into diffrent file by key mod nReduce
		//then ReduceTask r only dispose the r'th intermediate file's KeyValues
		r := ihash(kv.Key) % nReduce
		fileName := reduceName(jobName, mapTask, r)
		file2kv[fileName] = append(file2kv[fileName], kv)
	}
	for file, kvs := range file2kv {
		f, _ := os.Create(file)
		enc := json.NewEncoder(f)
		for _, kv := range kvs {
			enc.Encode(&kv)
		}
		f.Close()
	}

2.doReduce

func doReduce(
	jobName string, // the name of the whole MapReduce job
	reduceTask int, // which reduce task this is
	outFile string, // write the output here
	nMap int, // the number of map tasks that were run ("M" in the paper)
	reduceF func(key string, values []string) string,
)

这个函数的主要功能就是收集所有map任务的第reduceTask 个中间文件(merge),然后调用用户定义的reduceF函数进行Reduce,最后sort并写入到output文件。
下面是完整的函数内容:

	//merge:merge KeyValue pair of the r'th intermediate file for all map task into key2v
	key2v := make(map[string][]string)
	for i := 0; i < nMap; i++ {
		//supposing reduceTask equals r,reduce task r collects the r'th intermediate file from each map task,
		//fileName is the r'th intermediate file's name
		fileName := reduceName(jobName, i, reduceTask)
		f, _ := os.Open(fileName)
		kvs := []KeyValue{}

		dec := json.NewDecoder(f)
		for {
			kv := KeyValue{}
			err := dec.Decode(&kv)
			if err != nil {
				fmt.Println("Decoder failed", err.Error())
				break
			}
			kvs = append(kvs, kv)
		}
		f.Close()
		for _, kv := range kvs {
			key2v[kv.Key] = append(key2v[kv.Key], kv.Value)
		}
	}
	//sort key2v by key
	keys := []string{}
	for k := range key2v {
		keys = append(keys, k)
	}
	sort.Strings(keys)
	//write to outPutfile
	file, _ := os.Create(outFile)
	enc := json.NewEncoder(file)
	for _, k := range keys {
		enc.Encode(KeyValue{k, reduceF(k, key2v[k])})
	}
	file.Close()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值