go语言正则表达式regexp

最新推荐文章于 2024-08-21 15:50:40 发布

斯猿解陈.

最新推荐文章于 2024-08-21 15:50:40 发布

阅读量2.3k

点赞数

文章标签： go语言正则表达式函数

本文链接：https://blog.csdn.net/sqn614/article/details/70172798

版权

 
 regexp包里的正则表达式函数代码示例 

 
 package main 

 
 import ( 

 
     "fmt" 

 
     "io/ioutil" 

 
     "net/http" 

 
     "os" 

 
     "regexp" 

 
     "strings" 

)

 
 /*func Match(pattern string, b []byte) (matched bool, error error) 

 
 func MatchReader(pattern string, r io.RuneReader) (matched bool, error error) 

 
 func MatchString(pattern string, s string) (matched bool, error error)*/ 

 
 func IsIp(ip string) bool { 

 
     if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}.[0-9]{1,3}$", ip); !m { 

 
         return false 

}

 
     return true 

}

 
 func IsDigital(str string) string { 

 
     if m, _ := regexp.MatchString("^[0-9]+$", str); !m { 

 
         return "不是数字" 

}

 
     return "是数字" 

}

 
 func main1() { 

 
     fmt.Println(IsIp("192.168.172.88")) 

 
     fmt.Println(IsIp("192,167.12.11")) 

 
     fmt.Println(IsDigital("192,167.12.11")) 

 
     fmt.Println(IsDigital("192")) 

}

 
 /*以爬虫为例来说明如何使用正则来过滤或截取抓取到的数据*/ 

 
 func main2() { 

 
     resp, err := http.Get("http://www.baidu.com") 

 
     if err != nil { 

 
         fmt.Println("http ger error") 

 
         return 

}

 
     defer resp.Body.Close() 

 
     body, err := ioutil.ReadAll(resp.Body) 

 
     if err != nil { 

 
         fmt.Println("http read err") 

 
         return 

}

 
     src := string(body) 

 
     /*O_RDONLY int = syscall.O_RDONLY // open the file read-only. 

 
     O_WRONLY int = syscall.O_WRONLY // open the file write-only. 

 
     O_RDWR int = syscall.O_RDWR // open the file read-write. 

 
     O_APPEND int = syscall.O_APPEND // append data to the file when writing. 

 
     O_CREATE int = syscall.O_CREAT // create a new file if none exists. 

 
     O_EXCL int = syscall.O_EXCL // used with O_CREATE, file must not exist 

 
     O_SYNC int = syscall.O_SYNC // open for synchronous I/O. 

 
     O_TRUNC int = syscall.O_TRUNC // if possible, truncate file when opened.*/ 

 
     file, _ := os.OpenFile("baidu.html", os.O_RDWR|os.O_CREATE, 0666) 

 
     defer file.Close() 

 
     file.WriteString(src) 

 
     //使用复杂的正则首先是Compile，它会解析正则表达式是否合法，如果正确，那么就会返回一个Regexp， 

 
     //然后就可以利用返回的Regexp在任意的字符串上面执行需要的操作 

 
     //将HTML标签全转换成小写 

 
     re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") 

 
     src = re.ReplaceAllStringFunc(src, strings.ToLower) 

 
     //去除STYLE 

 
     re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>") 

 
     src = re.ReplaceAllString(src, "") 

 
     //去除SCRIPT 

 
     re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>") 

 
     src = re.ReplaceAllString(src, "") 

 
     //去除所有尖括号内的HTML代码，并换成换行符 

 
     re, _ = regexp.Compile("\\<[\\S\\s]+?\\>") 

 
     src = re.ReplaceAllString(src, "\n") 

 
     //去除连续的换行符 

 
     re, _ = regexp.Compile("\\s{2,}") 

 
     src = re.ReplaceAllString(src, "\n") 

 
     fmt.Println(strings.TrimSpace(src)) 

}

 
 func main() { 

 
     a := "I am learning Go language" 

 
     fmt.Println([]byte(a)) 

 
     re, _ := regexp.Compile("[a-z]{2,4}") 

 
     //查找符合正则的第一个 

 
     one := re.Find([]byte(a)) 

 
     fmt.Println("Find:", string(one)) 

 
     //查找符合正则的所有slice,n小于0表示返回全部符合的字符串，不然就是返回指定的长度 

 
     all := re.FindAll([]byte(a), -1) 

 
     for _, value := range all { 

 
         fmt.Println("FindAll", string(value)) 

}

 
     //查找符合条件的index位置,开始位置和结束位置 

 
     index := re.FindIndex([]byte(a)) 

 
     fmt.Println("FindIndex", index) 

 
     //查找符合条件的所有的index位置，n同上 

 
     allindex := re.FindAllIndex([]byte(a), -1) 

 
     fmt.Println("FindAllIndex", allindex) 

 
     re2, _ := regexp.Compile("am(.*)lang(.*)") 

 
     //查找Submatch,返回数组，第一个元素是匹配的全部元素，第二个元素是第一个()里面的，第三个是第二个()里面的 

 
     //下面的输出第一个元素是"am learning Go language" 

 
     //第二个元素是" learning Go "，注意包含空格的输出 

 
     //第三个元素是"uage" 

 
     submatch := re2.FindSubmatch([]byte(a)) 

 
     fmt.Println("FindSubmatch", submatch) 

 
     for _, v := range submatch { 

 
         fmt.Println(string(v)) 

}

 
     //定义和上面的FindIndex一样 

 
     submatchindex := re2.FindSubmatchIndex([]byte(a)) 

 
     fmt.Println(submatchindex) 

 
     //FindAllSubmatch,查找所有符合条件的子匹配 

 
     submatchall := re2.FindAllSubmatch([]byte(a), -1) 

 
     fmt.Println(submatchall) 

 
     //FindAllSubmatchIndex,查找所有字匹配的index 

 
     submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1) 

 
     fmt.Println(submatchallindex) 

 
     src1 := []byte(` 

 
 call hello alice 

 
 hello bob 

 
 call hello eve 

`)

 
     pat := regexp.MustCompile(`(?m)(call)\s+(?P<cmd>\w+)\s+(?P<arg>.+)\s*$`) 

 
     res := []byte{} 

 
     for _, s := range pat.FindAllSubmatchIndex(src1, -1) { 

 
         res = pat.Expand(res, []byte("$cmd('$arg')\n"), src1, s) 

}

 
     fmt.Println("expand ", string(res)) 

}

关注