文件分片读取

将多个文件分割成m部分,并随机读取每个部分。

package utils

import (
	"bufio"
	"io"
	"log"
	"os"
	"strings"
)

// SplitableFile
/**
*	accurate to one line.
* 	@author delicious
*/
type SplitableFile struct {
	m     int
	files []string

	// for one partition, begin with
	offsetF []int // which file
	offsetL []int // which line
	lines   []int // and, how many lines contains
}

func FromFiles(files []string, m int) SplitableFile {
	res := SplitableFile{
		files:   files,
		m:       m,
		offsetF: make([]int, m, m),
		offsetL: make([]int, m, m),
		lines:   make([]int, m, m),
	}
	res.init()
	return res
}

func (sf *SplitableFile) init() {
	m := sf.m
	sl := make([]int, len(sf.files), len(sf.files))

	for i, s := range sf.files {
		lines := 0
		file, err := os.Open(s)
		if err != nil {
			log.Fatalln(err)
		}
		r := bufio.NewReader(file)
		for {
			_, prefix, eof := r.ReadLine()
			if eof == io.EOF {
				break
			}
			if !prefix {
				lines++
			}
		}
		_ = file.Close()
		sl[i] = lines
	}

	sum := 0
	for _, i := range sl {
		sum += i
	}

	for i := range sf.lines {
		sf.lines[i] = sum / m
	}
	sf.lines[m-1] = sum - (m-1)*(sum/m)

	fid := 0
	lid := 0
	for i, need := range sf.lines {
		sf.offsetF[i] = fid
		sf.offsetL[i] = lid

	padding:
		{
			if need == sl[fid]-lid {
				fid++
				lid = 0
			} else if need < sl[fid]-lid {
				lid += need
			} else {
				need -= sl[fid] - lid
				fid++
				lid = 0
				goto padding
			}
		}

	}
}

func (sf *SplitableFile) ReadSplit(ith int) string {
	fid := sf.offsetF[ith]
	r, f := sf.reader(fid)

	// skip
	for i := 0; i < sf.offsetL[ith]; i++ {
		_, _, _ = r.ReadLine()
	}

	builder := strings.Builder{}
	for i := 0; i < sf.lines[ith]; i++ {
		bytes, _, eof := r.ReadLine()
		if eof == io.EOF {
			_ = f.Close()
			fid++
			r, f = sf.reader(fid)
		}
		if builder.Len() > 0 {
			builder.WriteString("\n")
		}
		builder.WriteString(string(bytes))
	}

	_ = f.Close()

	return builder.String()
}

func (sf *SplitableFile) reader(fid int) (*bufio.Reader, *os.File) {
	s := sf.files[fid]
	f, err := os.Open(s)
	if err != nil {
		log.Fatalln(err)
	}
	return bufio.NewReader(f), f
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值