map篇
在C语言时代,一个map函数可能长成下面这个样子
func mapF(f IntMapFunc, i ...int) []int {
y := make([]int, len(i), cap(i))
for j := range i {
y[j] = f(i[j])
}
return y
}
func mapFInplace(f IntMapFunc, i ...int) []int {
for j := range i {
i[j] = f(i[j])
}
return i
}
看起来不错,但这个函数只适用于int,如果是int8,那么golang强类型的坑会让你抓瞎。不得不又复制一份。最终得到的结果是这会让所有的程序设计者如鲠在喉。
如果不考虑效率?你能想到的最优美的写法是什么?
如果只用reflect包,那么会是下面这个样子
import "reflect"
func MapR(fi interface{}) functional.Function {
var f = reflect.ValueOf(fi)
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := reflect.MakeSlice(slice.Type(), slice.Len(), slice.Cap())
for i := slice.Len() - 1; i >= 0; i-- {
in[0] = slice.Index(i)
sliceMap.Index(i).Set(f.Call(in)[0])
}
out = append(out, sliceMap)
return out
}
}
func MapRInplace(fi interface{}) functional.Function {
var f = reflect.ValueOf(fi)
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
for i := slice.Len() - 1; i >= 0; i-- {
in[0] = slice.Index(i)
slice.Index(i).Set(f.Call(in)[0])
}
in[0] = slice
return in
}
}
在golang中,一个function的interface{}
形式是type Function=func(in []reflect.Value) []reflect.Value
。如果一个变量类型是Function
,那么它可以被绑定到一个地址上。我们使用下面的胶水函数,完成这两个函数的全部封装。
func Map(fi, fm interface{}) {
functional.MakeFunc(MapR(fi), fm)
return
}
func MapInplace(fi, fm interface{}) {
functional.MakeFunc(MapRInplace(fi), fm)
return
}
我们对这两个函数做一个测试。
func BenchmarkMap(b *testing.B) {
var x func(i ...int) []int
Map(func(i int) int { return i + 1 }, &x)
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = x(s...)
}
}
func BenchmarkMapInplace(b *testing.B) {
var x func(i ...int) []int
Map(func(i int) int { return i + 1 }, &x)
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
x(s...)
}
}
func BenchmarkMap100000Raw(b *testing.B) {
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = mapF(add1, s...)
}
}
func BenchmarkMap100000RawInplace(b *testing.B) {
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = mapFInplace(add1, s...)
}
}
=== RUN TestMap
[2 3]
[3 4]
--- PASS: TestMap (0.00s)
BenchmarkMap-12 50 24199896 ns/op
BenchmarkMapInplace-12 50 24840064 ns/op
BenchmarkMap100000Raw-12 5000 319601 ns/op
BenchmarkMap100000RawInplace-12 10000 226000 ns/op
虽然特别优美,对于任意函数func(T) T
,都能绑定到一个函数指针func(...T) []T
或者func([]T) []T
上,并且它也实现了功能,但这也太慢了!比非通用的函数要慢100倍,也就是说这两个函数如果使用到工程中,99%的时间会用在reflect上,这能玩?
注意到runtime-call是一个大量消耗cpu的点。我们使用接口对这种特性进行封装。
type Handler interface {
Call(f interface{}, index int)
}
type Mapper struct {
F func(slice interface{}) Handler
FInplace(slice interface{}) Handler
}
type MapperF interface {
F(slice interface{}) Handler
FInplace(slice interface{}) Handler
}
type Mapper struct {
MapperF
CoreCount int
}
func (m Mapper) MapR(fi interface{}) functional.Function {
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := m.F(slice.Interface())
for i := slice.Len()-1; i >= 0; i-- {
sliceMap.Call(fi, i)
}
return append(out, reflect.ValueOf(sliceMap).Convert(t).Value())
}
}
func (m Mapper) MapR(fi interface{}) functional.Function {
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := m.FInplace(slice.Interface())
for i := slice.Len()-1; i >= 0; i-- {
sliceMap.Call(fi, i)
}
return append(out, reflect.ValueOf(sliceMap).Convert(t).Value())
}
}
注意到for循环彼此无关,我们再把for/Handler提出来。
import "sync"
func mapSlice(handler Handler, l, r int, f interface{}) {
for i := l; i < r; i++ {
handler.Call(f, i)
}
}
func MapSlice(handler Handler, l, r, coreCount int, f interface{}) Handler {
step := (r - l + coreCount - 1) / coreCount
if coreCount <= 1 || step <= 0 {
mapSlice(handler, l, r, f)
} else {
var wg sync.WaitGroup
wg.Add(coreCount)
for i := l; i < r; i += step {
go func(i int) {
mapSlice(handler, i, min(i+step, r), f)
wg.Done()
}(i)
}
wg.Wait()
}
return handler
}
对Mapper
略微修改
func (m Mapper) MapR(fi interface{}) functional.Function {
if m.CoreCount == 0 {
m.CoreCount = 1
}
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
in[0] = reflect.ValueOf(MapSlice(m.F(slice.Interface()),
0, slice.Len(), m.CoreCount, fi)).Convert(t)
return in
}
}
func (m Mapper) MapRInplace(fi interface{}) functional.Function {
if m.CoreCount == 0 {
m.CoreCount = 1
}
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
in[0] = reflect.ValueOf(MapSlice(m.FInplace(slice.Interface()),
0, slice.Len(), m.CoreCount, fi)).Convert(t)
return in
}
}
因为Mapper的特性被提出为MapperF,这时如果MapperF没有被初始化,将会导致错误。因此我们再做一个自动反向注入的Mapper类。
type MapperTraits struct {
Mapper
functional.BaseTraitsInterface
}
func NewMapperTraits(handler interface{}, options ...interface{}) MapperTraits {
t := MapperTraits{
BaseTraitsInterface: functional.NewBaseTraits(handler),
}
for i := range options {
switch option := options[i].(type) {
case int:
t.CoreCount = option
}
}
return t
}
func (m MapperTraits) F(sliceI interface{}) Handler {
slice := reflect.ValueOf(sliceI)
handler := reflect.MakeSlice(m.GetTypeInfo(), slice.Len(), slice.Cap())
reflect.Copy(handler, slice)
return handler.Interface().(Handler)
}
func (m MapperTraits) FInplace(slice interface{}) Handler {
return reflect.ValueOf(slice).Convert(m.GetTypeInfo()).Interface().(Handler)
}
func (m MapperTraits) MapR(fi interface{}) functional.Function {
if m.MapperF == nil {
m.MapperF = m
}
return m.Mapper.MapR(fi)
}
func (m MapperTraits) Map(fi, fm interface{}) {
functional.MakeFunc(m.MapR(fi), fm)
}
func (m MapperTraits) MapRInplace(fi interface{}) functional.Function {
if m.MapperF == nil {
m.MapperF = m
}
return m.Mapper.MapRInplace(fi)
}
func (m MapperTraits) MapInplace(fi, fm interface{}) {
functional.MakeFunc(m.MapRInplace(fi), fm)
}
测试如下
func TestMap(t *testing.T) {
var z func(i ...int) []int
var i = []int{1, 2}
var mapper = NewMapperTraits(IntHandler{})
mapper.Map(add1, &z)
i = z(i...)
fmt.Println(i)
var mapper2 = NewMapperTraits(IntHandler{}, 2)
mapper2.Map(add1, &z)
i = z(i...)
fmt.Println(i)
}
func BenchmarkMapRaw(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = mapF(add1, s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRawInplace(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = mapFInplace(add1, s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapper(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(IntMapper{}.MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperTraits(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}).MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperTraits8(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 8).MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits4(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 4).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits8(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 8).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRaw4(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = MapSlice(IntHandler(s), 0, factor, 4, func(a int) int { return a + 1 }).(IntHandler)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRaw8(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = MapSlice(IntHandler(s), 0, factor, 8, func(a int) int { return a + 1 }).(IntHandler)
}
//fmt.Println("|", s[0], "|")
}
=== RUN TestMap
[2 3]
[3 4]
--- PASS: TestMap (0.00s)
goos: windows
goarch: amd64
pkg: github.com/Myriad-Dreamin/functional-go/mr
// 100000000
BenchmarkMapRaw-12 3 371668433 ns/op
BenchmarkMapRawInplace-12 5 240799760 ns/op
BenchmarkMapper-12 2 628998950 ns/op
BenchmarkMapperTraits-12 2 623999250 ns/op
BenchmarkMapperInplaceTraits-12 2 513511500 ns/op
BenchmarkMapperTraits8-12 5 281000660 ns/op
BenchmarkMapperInplaceTraits4-12 10 158599680 ns/op
BenchmarkMapperInplaceTraits8-12 10 126300010 ns/op
BenchmarkMapRaw4-12 10 147497270 ns/op
BenchmarkMapRaw8-12 10 126496710 ns/op
// 100000
BenchmarkMap-12 50 24939844 ns/op
BenchmarkMapInplace-12 50 24620058 ns/op
测试结果是非原地Mapper的耗时为0.028s/1e8次运算,与不适用此方法的Map几乎相同。不错,这样的map函数应该已经恰到好处了。
Conclusion
最终我们得到了什么。
如果对效率需求不高,那么可以使用Map
直接生成Map函数。如果对效率的需求略高,只需要写一个Slice的配接器。在此例中,为:
type IntHandler []int
func (handler IntHandler) Call(f interface{}, index int) {
handler[index] = f.(func(int) int)(handler[index])
}
如果你对效率的需求真的特别特别高,那么大可重新干回老本行,复制粘贴文本替换。。
func r(i ...int) []int {
y := make([]int, len(i), cap(i))
for j := range i {
y[j] = i[j] + 1
}
return y
}
func rInplace(i ...int) []int {
for j := range i {
i[j] = i[j] + 1
}
return i
}
对于这个耦合度极高的Map函数而言,效率如下:
BenchmarkMapRaw-12 10 166700110 ns/op
BenchmarkMapRawInplace-12 20 85599920 ns/op
换来的只不过是要复制粘贴很多次,失去了封装的好处!
顺便说一下,在同等情况下c语言的运算力为0.6s/1e8次。果然是时代变了,大人。
Reduce篇
留作课后习题
Reference
所有的代码见github