go源码库学习之regexp库

最新推荐文章于 2023-06-19 13:48:39 发布
liao__ran
最新推荐文章于 2023-06-19 13:48:39 发布
阅读量252
点赞数
分类专栏： go库源码文章标签： golang 学习
本文链接：https://blog.csdn.net/liao__ran/article/details/128313722
版权
go库源码专栏收录该内容
8 篇文章 2 订阅
订阅专栏
package main

import (
	"fmt"
	"regexp"
	"regexp/syntax"
	"strings"
)

func main()  {
	//regexpMain()
	regexpSyntaxMain()
}

func regexpMain()  {
	// 解析正则表达式，如果成功，返回 一个可用于匹配文本的Regexp对象。
	// 采用最左最短方式搜索
	regexpCompile,err := regexp.Compile("foo.?")
	if err != nil {
		fmt.Println("regexpCompile err: ",err)
	}

	// 解析正则表达式，如果成功，返回 一个可用于匹配文本的Regexp对象。
	// 采用最左最短方式搜索，如果表达式不能被解析就会panic
	regexpMustCompile := regexp.MustCompile("foo(?P<first>.?)")
	fmt.Println(regexpMustCompile.String())

	// 解析正则表达式，如果成功，返回 一个可用于匹配文本的Regexp对象。
	// 采用最左最长方式搜索
	regexpCompilePOSIX,_ := regexp.CompilePOSIX("(foo.?)")
	fmt.Println(regexpCompilePOSIX.String())

	// 解析正则表达式，如果成功，返回 一个可用于匹配文本的Regexp对象。
	// 采用最左最长方式搜索，如果表达式不能被解析就会panic
	regexpMustCompilePOSIX := regexp.MustCompilePOSIX("foo.?")
	fmt.Println(regexpMustCompilePOSIX.String())

	// 返回用户匹配的正则源文本
	regexpString := regexpCompile.String()
	fmt.Println(regexpString)

	// 从re复制的新的Regexp对象
	regexpCopy := regexpCompile.Copy()
	fmt.Println(regexpCopy.String())

	// 统计正则表达式中的分组个数（不包括“非捕获的分组”）
	regexpNumSubexp := regexpMustCompile.NumSubexp()
	fmt.Println(regexpNumSubexp)

	// 返回正则表达式中的分组名称列表，未命名的分组返回空字符串
	regexpSubexpNames := regexpMustCompile.SubexpNames()
	fmt.Println(regexpSubexpNames)

	// 返回具有给定名称的第一个子表达式的索引，如果没有具有该名称的子表达式，则返回-1。
	regexpSubexpIndex := regexpMustCompile.SubexpIndex("bob")
	fmt.Println(regexpSubexpIndex)

	// 返回以正则表达式任何开头的字符串，字符串包含整个正则则返回true
	// 示例：foo.? 返回foo，true
	regexpLiteralPrefixPrefix,regexpLiteralPrefixcomplete := regexpCompile.LiteralPrefix()
	fmt.Println(regexpLiteralPrefixPrefix,regexpLiteralPrefixcomplete)

	// 输入的数据 r(RuneReader) 是否包含正则表达式的任何匹配项
	regexpMatchReader := regexpCompile.MatchReader(strings.NewReader("hello the food foo!"))
	fmt.Println(regexpMatchReader)

	// 输入的数据 r(String) 是否包含正则表达式的任何匹配项
	regexpMatchString := regexpCompile.MatchString("hello the food foo!")
	fmt.Println(regexpMatchString)

	// 输入的数据 字节切片(Byte) 是否包含正则表达式的任何匹配项
	regexpMatch := regexpCompile.Match([]byte("hello the food foo!"))
	fmt.Println(regexpMatch)

	// r(RuneReader) 是否包含正则表达式 pattern 的任何匹配项
	regexpMatchReaderBool,regexpMatchReaderErr := regexp.MatchReader("foo.?",strings.NewReader("hello the food foo!"))
	fmt.Println(regexpMatchReaderBool,regexpMatchReaderErr)

	// r(string) 是否包含正则表达式 pattern 的任何匹配项
	regexpMatchStringBool,regexpMatchStringErr := regexp.MatchString("foo.?","hello the food foo!")
	fmt.Println(regexpMatchStringBool,regexpMatchStringErr)

	// b(字节片) 是否包含正则表达式 pattern 的任何匹配项
	regexpMatchBool,regexpMatchErr := regexp.Match("foo.?",[]byte("hello the food foo!"))
	fmt.Println(regexpMatchBool,regexpMatchErr)

	// 在 src 中搜索匹配内容并替换为指定的 repl 内容，全部替换，并返回替换后的 src
	regexpReplaceAll := regexpCompile.ReplaceAll([]byte("hello the food foo!"),[]byte("ddd"))
	fmt.Println(regexpReplaceAll)
	regexpReplaceAllString := regexpCompile.ReplaceAllString("hello the food foo!","ddd")
	fmt.Println(regexpReplaceAllString)

	// 在 src 中搜索匹配内容并替换为指定的 repl 内容，全部替换，并返回替换后的 src
	// 若 repl 中有 分组引用符 则当普通字符处理
	regexpReplaceAllLiteral := regexpMustCompile.ReplaceAllLiteral([]byte("hello the food foo!"),[]byte("$ddd"))
	fmt.Println(regexpReplaceAllLiteral)
	regexpReplaceAllLiteralString := regexpMustCompile.ReplaceAllLiteralString("hello the food foo!","$ddd")
	fmt.Println(regexpReplaceAllLiteralString)

	// 在 src 中搜索匹配内容并替换为指定的 repl方法 内容，全部替换，并返回替换后的 src
	// 若 repl 中有 分组引用符 则当普通字符处理
	regexpReplaceAllFunc := regexpCompile.ReplaceAllFunc([]byte("hello the food foo!"),func(b []byte) []byte { return []byte("World!") })
	fmt.Println(regexpReplaceAllFunc)
	regexpReplaceAllStringFunc := regexpCompile.ReplaceAllStringFunc("hello the food foo!", func(s string) string { return "World!" })
	fmt.Println(regexpReplaceAllStringFunc)

	// 返回一个字符串，它引用参数文本中的所有正则表达式元字符； 返回的字符串是一个匹配文本文本的正则表达式。例如，QuoteMeta（[foo]）返回\[foo\]
	// 特殊字符有：\.+*?()|[]{}^$
	// 这些字符用于实现正则语法，所以当作普通字符使用时需要转换
	regexpQuoteMeta := regexp.QuoteMeta(`Escaping symbols like: .+*?()|[]{}^$`)
	fmt.Println(regexpQuoteMeta)

	// 查找字节切片列表，返回第一个查找的内容
	regexpFind := regexpCompile.Find([]byte("hello the food foo!"))
	fmt.Println(string(regexpFind))

	// 查找字节切片，返回第一个匹配的位置(起始位置，结束位置)
	regexpFindIndex := regexpCompile.FindIndex([]byte("hello the food foo!"))
	fmt.Println(regexpFindIndex)

	// 查找字符串，返回第一个匹配的内容
	regexpFindString := regexpCompile.FindString("hello the food foo!")
	fmt.Println(regexpFindString)

	// 查找字符串，返回第一个匹配的位置(起始位置，结束位置)
	regexpFindStringIndex := regexpCompile.FindStringIndex("hello the food foo!")
	fmt.Println(regexpFindStringIndex)

	// 查找文本，返回第一个匹配的位置(起始位置，结束位置)
	regexpFindReaderIndex := regexpCompile.FindReaderIndex(strings.NewReader("hello the food foo!"))
	fmt.Println(regexpFindReaderIndex)

	// 查找文本，返回第一个匹配的位置 ,同时返回子表达式匹配的位置
	regexpFindReaderSubmatchIndex := regexpCompile.FindReaderSubmatchIndex(strings.NewReader("hello the food foo!"))
	fmt.Println(regexpFindReaderSubmatchIndex)

	// 查找字节切片，返回第一个匹配的内容 ,同时返回子表达式匹配的内容
	regexpFindSubMatch := regexpCompile.FindSubmatch([]byte("hello the food foo!"))
	fmt.Println(string(regexpFindSubMatch[0]))

	// 查找字节切片，返回第一个匹配的位置 ,同时返回子表达式匹配的位置
	regexpFindSubMatchIndex := regexpCompile.FindSubmatchIndex([]byte("hello the food foo!"))
	fmt.Println(regexpFindSubMatchIndex)

	// 查找字符串，返回第一个匹配的内容 ,同时返回子表达式匹配的内容
	regexpFindStringSubMatch := regexpCompile.FindStringSubmatch("hello the food foo!")
	fmt.Println(string(regexpFindStringSubMatch[0]))

	// 查找字符串，返回第一个匹配的位置 ,同时返回子表达式匹配的位置
	regexpFindStringSubMatchIndex := regexpCompile.FindStringSubmatchIndex("hello the food foo!")
	fmt.Println(regexpFindStringSubMatchIndex)

	// 字节片查找前n个匹配的内容，若n<0,查找所有，返回所有的匹配到的内容
	regexpFindAlls := regexpCompile.FindAll([]byte("hello the food foo!"),-1)
	for _,regexpFindAll := range regexpFindAlls {
		fmt.Println(string(regexpFindAll))
	}

	// 字节片查找前n个匹配的内容位置，若n<0,查找所有，返回所有的匹配到的内容位置
	regexpFindAllIndexs := regexpCompile.FindAllIndex([]byte("hello the food foo!"),-1)
	for _,regexpFindAllIndex := range regexpFindAllIndexs {
		fmt.Println(regexpFindAllIndex)
	}

	// 字符串查找前n个匹配的内容，若n<0,查找所有，返回所有的匹配到的内容
	regexpFindAllStrings := regexpCompile.FindAllString("hello the food foo!",-1)
	fmt.Println(regexpFindAllStrings )

	// 字符串查找前n个匹配的内容位置，若n<0,查找所有，返回所有的匹配到的内容位置
	regexpFindAllStringIndexs := regexpCompile.FindAllStringIndex("hello the food foo!",-1)
	for _,regexpFindAllStringIndex := range regexpFindAllStringIndexs {
		fmt.Println(regexpFindAllStringIndex)
	}

	// 字节片查找前n个匹配的内容，若n<0,查找所有，返回所有的匹配到的内容,同时返回子表达式匹配的内容
	regexpFindAllSubmatchss := regexpCompile.FindAllSubmatch([]byte("hello the food foo!"),-1)
	for _,regexpFindAllSubmatchs := range regexpFindAllSubmatchss {
		for _,regexpFindAllSubmatch := range regexpFindAllSubmatchs {
			fmt.Println(string(regexpFindAllSubmatch))
		}
	}

	// 字节片查找前n个匹配的内容位置，若n<0,查找所有，返回所有的匹配到的内容位置,同时返回子表达式匹配的内容位置
	regexpFindAllSubmatchIndexs := regexpCompile.FindAllSubmatchIndex([]byte("hello the food foo!"),-1)
	for _,regexpFindAllSubmatchIndex := range regexpFindAllSubmatchIndexs {
		fmt.Println(regexpFindAllSubmatchIndex)
	}

	// 字符串查找前n个匹配的内容，若n<0,查找所有，返回所有的匹配到的内容,同时返回子表达式匹配的内容
	regexpFindAllStringSubmatchs := regexpCompile.FindAllStringSubmatch("hello the food foo!",-1)
	for _,regexpFindAllStringSubmatch := range regexpFindAllStringSubmatchs {
		fmt.Println(regexpFindAllStringSubmatch)
	}

	// 字符串查找前n个匹配的内容位置，若n<0,查找所有，返回所有的匹配到的内容位置,同时返回子表达式匹配的内容位置
	regexpFindAllStringSubmatchIndexs := regexpCompile.FindAllStringSubmatchIndex("hello the food foo!",-1)
	for _,regexpFindAllStringSubmatchIndex := range regexpFindAllStringSubmatchIndexs {
		fmt.Println(regexpFindAllStringSubmatchIndex)
	}

	// 分割字符串n份，若n<0,分割所有，返回分割列表
	regexpSplit := regexpCompile.Split("lksjdfoodklklklllsklakfoolllllfooksafkjslasfjk",-1)
	fmt.Println(regexpSplit)

	content := `
	# comment line
	option1: value1
	option2: value2
	# another comment line
	option3: value3`
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
	template := "$key=$value\n"
	var result []byte
	var resultStr []byte
	for _, s := range pattern.FindAllSubmatchIndex([]byte(content), -1) {
		// 将 template 的内容经过处理后，追加到 dst 的尾部
		// template 中要有 $1、$2、${name1}、${name2} 这样的“分组引用符”
		// match 是由 FindSubmatchIndex 方法返回的结果，里面存放了各个分组的位置信息
		// 如果 template 中有“分组引用符”，则以 match 为标准，
		// 在 src 中取出相应的子串，替换掉 template 中的 $1、$2 等引用符号。
		result = pattern.Expand(result, []byte(template), []byte(content), s)
	}
	fmt.Println(string(result))
	for _, s := range pattern.FindAllStringSubmatchIndex(content, -1) {
		// 同Expand类似
		resultStr = pattern.ExpandString(resultStr, template, content, s)
	}
	fmt.Println(string(resultStr))

	// 切换到贪婪匹配
	regexpCompile.Longest()
}

func regexpSyntaxMain()  {
	// 那种解析表达式错误
	fmt.Println(syntax.ErrInvalidUTF8.String())

	// 解析并返回正在解析的字符串及正则表达式
	syntaxRegexp,syntaxError := syntax.Parse("{`aaafooddddlkskffoollll`, `foo(?P<first>.?)`}",syntax.Perl)
	if syntaxError != nil {
		// 解析表达式错误，返回错误
		fmt.Println(syntaxError.Error())
	}

	syntaxRegexpTow,syntaxErrorTow := syntax.Parse("{`aaafooddddlkskffoollll`, `foo(?P<first>.?)`}",syntax.Perl)
	if syntaxErrorTow != nil {
		// 解析表达式错误，返回错误
		fmt.Println(syntaxErrorTow.Error())
	}

	// 正则表达式进行简化
	fmt.Println(syntaxRegexp.Simplify())

	// 返回正则表达式的名称
	syntaxCapName := syntaxRegexp.CapNames()
	fmt.Println(syntaxCapName)

	// x和y是否有相同的结构
	syntaxEqual := syntaxRegexp.Equal(syntaxRegexpTow)
	fmt.Println(syntaxEqual)

	// 将字符串及正则表达式以字符串输出
	syntaxString := syntaxRegexp.String()
	fmt.Println(syntaxString)

	// 遍历Regexp找到匹配到的最大索引
	syntaxMaxCap := syntaxRegexp.MaxCap()
	fmt.Println(syntaxMaxCap)

	// 将正则编译成可执行的程序
	syntaxComplie,_ := syntax.Compile(syntaxRegexp)

	// 循环将每个匹配转换为字符串输出
	fmt.Println(syntaxComplie.String())

	// 输出正则匹配规则的开头
	fmt.Println(syntaxComplie.Prefix())

	// 返回正则匹配前的宽度
	fmt.Println(syntaxComplie.StartCond())
}