Go多个正则表达式查找的区别

小龙在山东

已于 2023-06-20 16:03:32 修改

阅读量1k

点赞数

分类专栏： Go 文章标签：正则表达式 golang

于 2023-06-16 20:30:00 首次发布

本文链接：https://blog.csdn.net/lilongsy/article/details/131243919

版权

Go 专栏收录该内容

45 篇文章 1 订阅

订阅专栏

区别

Go的regexp是用正则表达式来查找、替换处理字符串的包，查找有很多方法，他们的区别如下：

函数	参数类型	能否全部	能否子匹配	返回匹配字符串或索引
Find	字节数组	最左边	否	字节数组
FindAll	字节数组	全部	否	字节数组
FindIndex	字节数组	最左边	否	索引
FindAllIndex	字节数组	全部	否	索引
FindString	字符串	最左边	否	字符串
FindAllString	字符串	全部	否	字符串
FindAllStringIndex	字符串	全部	否	字符串
FindAllSubmatch	字节数组	全部	是	字节数组
FindAllSubmatchIndex	字节数组	全部	是	字节数组
FindSubmatch	字节数组	最左边	是	字节数组
FindSubmatchIndex	字节数组	最左边	是	字节数组
FindStringSubmatch	字符串	最左边	是	字符串数组
FindStringSubmatchIndex	字符串	最左边	是	字符串数组
FindAllStringSubmatch	字符串	全部	是	字符串数组
FindAllStringSubmatchIndex	字符串	全部	是	字符串数组

字节类型匹配模板Expand

package main_test

import (
	"fmt"
	"regexp"
	"testing"
)

func TestExpand(t *testing.T) {
	content := []byte(`
	# comment line
	option1: value1
	option2: value2

	# another comment line
	option3: value3
`)

	// 正则匹配：键值对key：value
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	// 通过命名分组类把"key: value" 转换为 "key=value"
	template := []byte("$key=$value\n")

	result := []byte{}
	for _, submatches := range pattern.FindAllSubmatchIndex(content, -1) {
		// 把匹配到的每个键值对应用到对应模板上
		result = pattern.Expand(result, template, content, submatches)
	}
	fmt.Println(string(result))
}

// option1=value1
// option2=value2
// option3=value3

这里用(?P<key>\w+)给分组命名为key，然后通过Expand将匹配到的key按照模板的样式输出给result，value同理，另外匹配必须要用FindAllSubmatchIndex的结果。

字符串类型匹配模板Expand

func TestExpandString(t *testing.T) {
	content := `
	# comment line
	option1: value1
	option2: value2

	# another comment line
	option3: value3
`

	// 解析正则
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	// 转换模板
	template := "$key=$value\n"

	result := []byte{}

	for _, submatches := range pattern.FindAllStringSubmatchIndex(content, -1) {
		// 应用
		result = pattern.ExpandString(result, template, content, submatches)
	}
	fmt.Println(string(result))
}

查找左侧第一个和所有

func TestFind(t *testing.T) {
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.Find([]byte(`seafood fool`)))
}
// "food"


func TestFindAll(t *testing.T) {
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.FindAll([]byte(`seafood fool`), -1))
}
// ["food" "fool"]

Find查找最左侧第一个。FindAll第二个参数是-1就是查找所有匹配，如果FindAll第二个参数是1，就跟Find一样了。
这两个方法不会返回子匹配。

字符串类型查找左侧第一个字符串

func TestFindString(t *testing.T) {
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.FindString("seafood fool"))
	fmt.Printf("%q\n", re.FindString("meat"))
}

// "food"
// ""

查找索引

func TestFindAllIndex(t *testing.T) {
	content := []byte("London")
	re := regexp.MustCompile(`o.`)
	fmt.Println(re.FindAllIndex(content, 1))
	fmt.Println(re.FindAllIndex(content, -1))
}
// [[1 3]]
// [[1 3] [4 6]]

FindAllIndex会返回匹配到的分片（开始和结束索引）。

字符串类型的查找所有

func TestFindAllString(t *testing.T) {
	re := regexp.MustCompile(`a.`)
	fmt.Println(re.FindAllString("paranormal", -1))
	fmt.Println(re.FindAllString("paranormal", 2))
	fmt.Println(re.FindAllString("graal", -1))
	fmt.Println(re.FindAllString("none", -1))
}
// [ar an al]
// [ar an]
// [aa]
// []

子匹配字符串查找所有

func TestFindAllStringSubmatch(t *testing.T) {
	re := regexp.MustCompile(`a(x*)b`)
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1))
}

// [["ab" ""]]
// [["axxb" "xx"]]
// [["ab" ""] ["axb" "x"]]
// [["axxb" "xx"] ["ab" ""]]

子匹配字符串查找所有索引

func TestFindAllStringSubmatchIndex(t *testing.T) {
	re := regexp.MustCompile(`a(x*)b`)
	// Indices:
	//    01234567   012345678
	//    -ab-axb-   -axxb-ab-
	fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1))
}
// [[1 3 2 2]]
// [[1 5 2 4]]
// [[1 3 2 2] [4 7 5 6]]
// [[1 5 2 4] [6 8 7 7]]
// []

子匹配字节类型查找所有

func TestFindAllSubmatch(t *testing.T) {
	re := regexp.MustCompile(`foo(.?)`)
	fmt.Printf("%q\n", re.FindAllSubmatch([]byte(`seafood fool`), -1))
}

// [["food" "d"] ["fool" "l"]]

子匹配字节类型查询所有索引

func TestFindAllSubmatchIndex(t *testing.T) {
	content := []byte(`
	# comment line
	option1: value1
	option2: value2
`)
	// Regex pattern captures "key: value" pair from the content.
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
	allIndexes := pattern.FindAllSubmatchIndex(content, -1)
	for _, loc := range allIndexes {
		fmt.Println(loc)
		fmt.Println("1:", string(content[loc[0]:loc[1]]))
		fmt.Println("2:", string(content[loc[2]:loc[3]]))
		fmt.Println("3:", string(content[loc[4]:loc[5]]))
	}
}

// [18 33 18 25 27 33]
// 1: option1: value1
// 2: option1
// 3: value1
// [35 50 35 42 44 50]
// 1: option2: value2
// 2: option2
// 3: value2

子匹配字节类型查询左侧第一索引

func TestFindIndex(t *testing.T) {
	content := []byte(`
	# comment line
	option1: value1
	option2: value2
`)
	// Regex pattern captures "key: value" pair from the content.
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	loc := pattern.FindIndex(content)
	fmt.Println(loc)
	fmt.Println(string(content[loc[0]:loc[1]]))
}

// [18 33]
// option1: value1

字符串子匹配最左侧

func TestFindStringSubmatch(t *testing.T) {
	re := regexp.MustCompile(`(?s)<title>(.*?)<\/title>`)
	fmt.Printf("%q\n", re.FindStringSubmatch(`<title>
	标题
	</title>`))
}

// ["<title>\n\t标题\n\t</title>" "\n\t标题\n\t"]

参考

https://github.com/google/re2/wiki/Syntax

小龙在山东

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
Go多个正则表达式查找的区别

Find查找最左侧第一个。FindAll第二个参数是-1就是查找所有匹配，如果FindAll第二个参数是1，就跟Find一样了。给分组命名为key，然后通过Expand将匹配到的key按照模板的样式输出给result，value同理，另外匹配必须要用。FindAllIndex会返回匹配到的分片（开始和结束索引）。这两个方法不会返回子匹配。
复制链接

扫一扫