简介/使用方式
ProcessFileByLine 按行处理文件的框架, 包含文件读写、并发处理、错误与结果记录等功能。
调用者只需要告诉框架如何处理文件中的一行内容,即传入行处理函数
行处理函数分两种
1、 无返回值
type ProcessFileByLineFunc func(string) error
2、 有返回值类型、返回一个结果作为处理后的行输出到结果文件
type ProcessFileByLineFuncAndSave func(string) (res string, err error)
(tips:当让你也可以用 在无返回值函数用闭包特性自定义处理结果 )
调用方式按函数类型和是否并发处理分为四个入口
1、 ProcessFileByLine
2、 ProcessFileByLineAndSave
3、 ProcessFileByLineParallel
4、 ProcessFileByLineAndSaveParallel
函数入口扩展参数
一些自定义参数、见 FileProcessExt注释
错误、结果、日志
错误信息会打到stdout,
出错的行本身会写入 ${srcFile}.err
结果会写入 ${srcFile}.res ("AndSave"方式行处理返回值)
框架同时提供了函数NameMap,你可以在项目init函数中将自己函数加进去,然后根据名字获取并使用,实现根据函数配置名处理文件。
源码
package processFileByLine
import (
"bufio"
"io"
"log"
"os"
"strings"
"sync"
"time"
)
type ProcessFileByLineFunc func(string) error
type ProcessFileByLineFuncAndSave func(string) (res string, err error)
var ProcessFileByLineFuncNameMap = map[string]ProcessFileByLineFunc{"": nil}
var ProcessFileByLineFuncAndSaveNameMap = map[string]ProcessFileByLineFuncAndSave{"": nil}
type FileProcessExt struct {
SkipLine int
Limit int
SrcFileLineSeperator byte
SrcFileLineTrim string
ResFileName string
ResFileLineSeperator string
ErrFileName string
MultiLine int
ShowProcessedNum bool
ShowProcessedNumV int
LoopReadN int
}
func (e *FileProcessExt) check(srcFile string) {
if e.Limit <= 0 {
e.Limit = 1000000000
}
if e.SrcFileLineSeperator == 0 {
e.SrcFileLineSeperator = '\n'
}
if e.SrcFileLineTrim == "" {
e.SrcFileLineTrim = string(e.SrcFileLineSeperator)
}
if e.ResFileLineSeperator == "" {
e.ResFileLineSeperator = "\n"
}
if e.ResFileName == "" {
e.ResFileName = srcFile + "_res"
}
if e.ErrFileName == "" {
e.ErrFileName = srcFile + "_err"
}
if e.MultiLine == 0 {
e.MultiLine = 1
}
if e.ShowProcessedNumV == 0 && e.ShowProcessedNum {
e.ShowProcessedNumV = 1024
}
}
func ProcessFileByLine(fileSrc string, f ProcessFileByLineFunc, ext FileProcessExt) (err error) {
ext.check(fileSrc)
file1, err := os.Open(fileSrc)
if err != nil {
log.Println("open src file err:", err)
return
}
defer file1.Close()
rd1 := bufio.NewReader(file1)
fNoEOF := true
for i := 1; i <= ext.SkipLine; i++ {
_, err2 := rd1.ReadString(ext.SrcFileLineSeperator)
if err2 != nil {
log.Println("skip line err:", err2)
return err2
}
}
for j := ext.SkipLine + 1; fNoEOF && j <= ext.SkipLine+ext.Limit; j++ {
if ext.ShowProcessedNumV > 0 && ext.MultiLine > 1 || j%ext.ShowProcessedNumV == 0 {
log.Println(j)
}
line, err := rd1.ReadString(ext.SrcFileLineSeperator)
var tmp string
for i := 0; i < ext.MultiLine-1; i++ {
tmp, err = rd1.ReadString(ext.SrcFileLineSeperator)
j++
line += tmp
}
if err != nil {
if err == io.EOF {
fNoEOF = false
} else {
log.Println("read src file err:", err)
return err
}
}
line = strings.Trim(line, ext.SrcFileLineTrim)
if line == "" {
continue
}
lineErr := f(line)
if lineErr != nil {
log.Println(j, "line get err:", lineErr)
}
}
return
}
func ProcessFileByLineAndSave(fileSrc string, f ProcessFileByLineFuncAndSave, ext FileProcessExt) (err error) {
ext.check(fileSrc)
file1, err := os.Open(fileSrc)
if err != nil {
log.Println("open src file err:", err)
return
}
defer file1.Close()
rd1 := bufio.NewReader(file1)
file2, err := os.OpenFile(ext.ResFileName, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0666)
if err != nil {
log.Println(err)
return
}
fWr := io.Writer(file2)
defer file2.Close()
bfwL := bufio.NewWriter(fWr)
defer bfwL.Flush()
fNoEOF := true
for i := 1; i <= ext.SkipLine; i++ {
_, err2 := rd1.ReadString(ext.SrcFileLineSeperator)
if err2 != nil {
log.Println("skip line err:", err2)
return err2
}
}
for j := ext.SkipLine + 1; fNoEOF && j <= ext.SkipLine+ext.Limit; j++ {
if ext.ShowProcessedNumV > 0 && ext.MultiLine > 1 || j%ext.ShowProcessedNumV == 0 {
log.Println(j)
}
line, err := rd1.ReadString(ext.SrcFileLineSeperator)
var tmp string
for i := 0; i < ext.MultiLine-1; i++ {
tmp, err = rd1.ReadString(ext.SrcFileLineSeperator)
j++
line += tmp
}
if err != nil {
if err == io.EOF {
fNoEOF = false
} else {
log.Println("read src file err:", err)
return err
}
}
line = strings.Trim(line, ext.SrcFileLineTrim)
if line == "" {
continue
}
res, lineErr := f(line)
if lineErr != nil {
log.Println(j, "line get err:", lineErr)
} else {
_, lineErr = bfwL.WriteString(res + ext.ResFileLineSeperator)
if lineErr != nil {
log.Println("writeLineRes", j, res, lineErr)
}
}
}
time.Sleep(1 * time.Second)
return
}
func ProcessFileByLineParallel(fileSrc string, f ProcessFileByLineFunc, parallel int, ext FileProcessExt) (err error) {
if parallel <= 1 {
return ProcessFileByLine(fileSrc, f, ext)
}
ext.check(fileSrc)
srcCh := make(chan string, parallel)
errCh := make(chan string, parallel)
wg := sync.WaitGroup{}
itemProducerRun := func() (err error) {
file1, err := os.Open(fileSrc)
if err != nil {
log.Println("open src file err:", err)
return
}
defer file1.Close()
rd1 := bufio.NewReader(file1)
fNoEOF := true
for i := 1; i <= ext.SkipLine; i++ {
_, err2 := rd1.ReadString(ext.SrcFileLineSeperator)
if err2 != nil {
log.Println("skip line err:", err2)
return err2
}
}
for j := ext.SkipLine + 1; fNoEOF && j <= ext.SkipLine+ext.Limit; j++ {
if ext.ShowProcessedNumV > 0 && ext.MultiLine > 1 || j%ext.ShowProcessedNumV == 0 {
log.Println(j)
}
line, err := rd1.ReadString(ext.SrcFileLineSeperator)
var tmp string
for i := 0; i < ext.MultiLine-1; i++ {
tmp, err = rd1.ReadString(ext.SrcFileLineSeperator)
j++
line += tmp
}
if err != nil {
if err == io.EOF {
fNoEOF = false
if ext.LoopReadN > 0 {
ext.LoopReadN--
file1.Seek(0, 0)
rd1.Discard(rd1.Buffered())
fNoEOF = true
}
} else {
log.Println("read src file err:", err)
return err
}
}
line = strings.Trim(line, ext.SrcFileLineTrim)
if line == "" {
continue
}
srcCh <- line
}
return
}
errItemSaverRun := func() (err error) {
file2, err := os.OpenFile(ext.ErrFileName, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0666)
if err != nil {
log.Println(err)
return
}
fWr := io.Writer(file2)
defer file2.Close()
bfwL := bufio.NewWriter(fWr)
defer bfwL.Flush()
for eritem := range errCh {
_, err = bfwL.WriteString(eritem + "\n")
if err != nil {
log.Println(err)
}
}
return
}
go func() {
err = itemProducerRun()
if err != nil {
log.Println("itemProducerRun err: ", err)
}
close(srcCh)
}()
go func() {
err = errItemSaverRun()
if err != nil {
log.Println("errItemSaverRun err: ", err)
}
}()
wg.Add(parallel)
for i := 0; i < parallel; i++ {
go func() {
defer wg.Done()
for item := range srcCh {
err := f(item)
if err != nil {
log.Println(err, item)
errCh <- item
}
}
}()
}
wg.Wait()
close(errCh)
time.Sleep(1 * time.Second)
return
}
func ProcessFileByLineAndSaveParallel(fileSrc string, f ProcessFileByLineFuncAndSave, parallel int, ext FileProcessExt) (err error) {
if parallel <= 1 {
return ProcessFileByLineAndSave(fileSrc, f, ext)
}
ext.check(fileSrc)
srcCh := make(chan string, parallel)
errCh := make(chan string, parallel)
resCh := make(chan string, parallel)
wg := sync.WaitGroup{}
itemProducerRun := func() (err error) {
file1, err := os.Open(fileSrc)
if err != nil {
log.Println("open src file err:", err)
return
}
defer file1.Close()
rd1 := bufio.NewReader(file1)
fNoEOF := true
for i := 1; i <= ext.SkipLine; i++ {
_, err2 := rd1.ReadString(ext.SrcFileLineSeperator)
if err2 != nil {
log.Println("skip line err:", err2)
return err2
}
}
for j := ext.SkipLine + 1; fNoEOF && j <= ext.SkipLine+ext.Limit; j++ {
if ext.ShowProcessedNumV > 0 && ext.MultiLine > 1 || j%ext.ShowProcessedNumV == 0 {
log.Println(j)
}
line, err := rd1.ReadString(ext.SrcFileLineSeperator)
var tmp string
for i := 0; i < ext.MultiLine-1; i++ {
tmp, err = rd1.ReadString(ext.SrcFileLineSeperator)
j++
line += tmp
}
if err != nil {
if err == io.EOF {
fNoEOF = false
if ext.LoopReadN > 0 {
ext.LoopReadN--
file1.Seek(0, 0)
rd1.Discard(rd1.Buffered())
fNoEOF = true
}
} else {
log.Println("read src file err:", err)
return err
}
}
line = strings.Trim(line, ext.SrcFileLineTrim)
if line == "" {
continue
}
srcCh <- line
}
return
}
errItemSaverRun := func() (err error) {
file2, err := os.OpenFile(ext.ErrFileName, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0666)
if err != nil {
log.Println(err)
return
}
fWr := io.Writer(file2)
defer file2.Close()
bfwL := bufio.NewWriter(fWr)
defer bfwL.Flush()
for eritem := range errCh {
_, err = bfwL.WriteString(eritem + "\n")
if err != nil {
log.Println(err)
}
}
return
}
resItemSaverRun := func() (err error) {
file2, err := os.OpenFile(ext.ResFileName, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0666)
if err != nil {
log.Println(err)
return
}
fWr := io.Writer(file2)
defer file2.Close()
bfwL := bufio.NewWriter(fWr)
defer bfwL.Flush()
for res := range resCh {
_, err = bfwL.WriteString(res + ext.ResFileLineSeperator)
if err != nil {
log.Println(err)
}
}
return
}
go func() {
err = itemProducerRun()
if err != nil {
log.Println("itemProducerRun err: ", err)
}
close(srcCh)
}()
go func() {
err = errItemSaverRun()
if err != nil {
log.Println("errItemSaverRun err: ", err)
}
}()
go func() {
err = resItemSaverRun()
if err != nil {
log.Println("errResItemSaverRun err: ", err)
}
}()
wg.Add(parallel)
for i := 0; i < parallel; i++ {
go func() {
defer wg.Done()
for item := range srcCh {
lineRes, lineErr := f(item)
if lineErr != nil {
log.Println(lineErr, item)
errCh <- item
} else {
resCh <- lineRes
}
}
}()
}
wg.Wait()
close(errCh)
close(resCh)
time.Sleep(1 * time.Second)
return
}