go库函数之-unicode-使用示例

最新推荐文章于 2023-11-11 23:24:26 发布
liao__ran
最新推荐文章于 2023-11-11 23:24:26 发布
阅读量1.1k
点赞数 2
分类专栏： go源码库 go 文章标签： golang
本文链接：https://blog.csdn.net/liao__ran/article/details/114667274
版权
go 同时被 2 个专栏收录
26 篇文章 1 订阅
订阅专栏
go源码库
7 篇文章 1 订阅
订阅专栏
//###############################################################
//D:\go\go\go库源码\源码库测试文件集合\unicode-example_test.go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package unicode_test

import (
   "fmt"
   "unicode"
)

// Functions starting with "Is" can be used to inspect which table of range a
// rune belongs to. Note that runes may fit into more than one range.
func Example_is() {

   // constant with mixed type runes
   const mixed = "\b5Ὂg̀9! ℃ᾭG"
   for _, c := range mixed {
      fmt.Printf("For %q:\n", c)
      if unicode.IsControl(c) {  // 判断一个字符是否是控制字符，主要是策略C的字符和一些其他的字符如代理字符
         fmt.Println("\tis control rune")
      }
      if unicode.IsDigit(c) {    // 判断一个r字符是否是十进制数字字符
         fmt.Println("\tis digit rune")
      }
      if unicode.IsGraphic(c) {  // 判断一个字符是否是unicode图形。包括字母、标记、数字、符号、标点、空白，参见L、M、N、P、S、Zs
         fmt.Println("\tis graphic rune")
      }
      if unicode.IsLetter(c) {   // 判断一个字符是否是字母
         fmt.Println("\tis letter rune")
      }
      if unicode.IsLower(c) {    // 判断字符是否是小写字母
         fmt.Println("\tis lower case rune")
      }
      if unicode.IsMark(c) { // 判断一个字符是否是标记字符
         fmt.Println("\tis mark rune")
      }
      if unicode.IsNumber(c) {   // 判断一个字符是否是数字字符
         fmt.Println("\tis number rune")
      }
      if unicode.IsPrint(c) {       // 判断一个字符是否是go的可打印字符   // 本函数基本和IsGraphic一致，只是ASCII空白字符U+0020会返回假
         fmt.Println("\tis printable rune")
      }
      if !unicode.IsPrint(c) {
         fmt.Println("\tis not printable rune")
      }
      if unicode.IsPunct(c) {    // 判断一个字符是否是unicode标点字符
         fmt.Println("\tis punct rune")
      }
      if unicode.IsSpace(c) {    // 判断一个字符是否是空白字符   // 在Latin-1字符空间中，空白字符为：'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).其它的空白字符请参见策略Z和属性Pattern_White_Space
         fmt.Println("\tis space rune")
      }
      if unicode.IsSymbol(c) {   // 判断一个字符是否是unicode符号字符
         fmt.Println("\tis symbol rune")
      }
      if unicode.IsTitle(c) {    // 判断字符是否是标题字母
         fmt.Println("\tis title case rune")
      }
      if unicode.IsUpper(c) {    // 判断字符是否是大写字
         fmt.Println("\tis upper case rune")
      }
   }

   // Output:
   // For '\b':
   //     is control rune
   //     is not printable rune
   // For '5':
   //     is digit rune
   //     is graphic rune
   //     is number rune
   //     is printable rune
   // For 'Ὂ':
   //     is graphic rune
   //     is letter rune
   //     is printable rune
   //     is upper case rune
   // For 'g':
   //     is graphic rune
   //     is letter rune
   //     is lower case rune
   //     is printable rune
   // For '̀':
   //     is graphic rune
   //     is mark rune
   //     is printable rune
   // For '9':
   //     is digit rune
   //     is graphic rune
   //     is number rune
   //     is printable rune
   // For '!':
   //     is graphic rune
   //     is printable rune
   //     is punct rune
   // For ' ':
   //     is graphic rune
   //     is printable rune
   //     is space rune
   // For '℃':
   //     is graphic rune
   //     is printable rune
   //     is symbol rune
   // For 'ᾭ':
   //     is graphic rune
   //     is letter rune
   //     is printable rune
   //     is title case rune
   // For 'G':
   //     is graphic rune
   //     is letter rune
   //     is printable rune
   //     is upper case rune
}

func ExampleSimpleFold() {
   // 迭代在unicode标准字符映射中互相对应的unicode码值
   // 在与r对应的码值中（包括r自身），会返回最小的那个大于r的字符（如果有）；否则返回映射中最小的字符
   fmt.Printf("%#U\n", unicode.SimpleFold('A'))      // 'a'
   fmt.Printf("%#U\n", unicode.SimpleFold('a'))      // 'A'
   fmt.Printf("%#U\n", unicode.SimpleFold('K'))      // 'k'
   fmt.Printf("%#U\n", unicode.SimpleFold('k'))      // '\u212A' (Kelvin symbol, K)
   fmt.Printf("%#U\n", unicode.SimpleFold('\u212A')) // 'K'
   fmt.Printf("%#U\n", unicode.SimpleFold('1'))      // '1'

   // Output:
   // U+0061 'a'
   // U+0041 'A'
   // U+006B 'k'
   // U+212A 'K'
   // U+004B 'K'
   // U+0031 '1'
}

func ExampleTo() {
   const lcG = 'g'
   fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))    //转大写
   fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))    //转小写
   fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))    //转标题

   const ucG = 'G'
   fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))    //转大写
   fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))    //转小写
   fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))    //转标题

   // Output:
   // U+0047 'G'
   // U+0067 'g'
   // U+0047 'G'
   // U+0047 'G'
   // U+0067 'g'
   // U+0047 'G'
}

func ExampleToLower() {    //转小写
   const ucG = 'G'
   fmt.Printf("%#U\n", unicode.ToLower(ucG))

   // Output:
   // U+0067 'g'
}
func ExampleToTitle() {    //转标题
   const ucG = 'g'
   fmt.Printf("%#U\n", unicode.ToTitle(ucG))

   // Output:
   // U+0047 'G'
}

func ExampleToUpper() {    //转大写
   const ucG = 'g'
   fmt.Printf("%#U\n", unicode.ToUpper(ucG))

   // Output:
   // U+0047 'G'
}

func ExampleSpecialCase() {
   t := unicode.TurkishCase

   const lci = 'i'
   fmt.Printf("%#U\n", t.ToLower(lci))
   fmt.Printf("%#U\n", t.ToTitle(lci))
   fmt.Printf("%#U\n", t.ToUpper(lci))

   const uci = 'İ'
   fmt.Printf("%#U\n", t.ToLower(uci))
   fmt.Printf("%#U\n", t.ToTitle(uci))
   fmt.Printf("%#U\n", t.ToUpper(uci))

   // Output:
   // U+0069 'i'
   // U+0130 'İ'
   // U+0130 'İ'
   // U+0069 'i'
   // U+0130 'İ'
   // U+0130 'İ'
}

#####################################################################
D:\go\go\go库源码\源码库测试文件集合\unicode-utf8-example_test.go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package utf8_test

import (
   "fmt"
   "unicode/utf8"
)

func ExampleDecodeLastRune() {
   // 解码p中最后一个utf-8编码序列，返回该码值和编码序列的长度
   utf8.DecodeLastRune(b)
   b := []byte("Hello, 世界")

   for len(b) > 0 {
      r, size := utf8.DecodeLastRune(b)
      fmt.Printf("%c %v\n", r, size)

      b = b[:len(b)-size]
   }
   // Output:
   // 界 3
   // 世 3
   //   1
   // , 1
   // o 1
   // l 1
   // l 1
   // e 1
   // H 1
}

func ExampleDecodeLastRuneInString() {
   // 类似DecodeLastRune但输入参数是字符串
   utf8.DecodeLastRuneInString(string(b))
   str := "Hello, 世界"

   for len(str) > 0 {
      r, size := utf8.DecodeLastRuneInString(str)
      fmt.Printf("%c %v\n", r, size)

      str = str[:len(str)-size]
   }
   // Output:
   // 界 3
   // 世 3
   //   1
   // , 1
   // o 1
   // l 1
   // l 1
   // e 1
   // H 1

}

func ExampleDecodeRune() {
   // 解码p开始位置的第一个utf-8编码的码值，返回该码值和编码的字节数
   // 如果编码不合法，会返回(RuneError, 1)。该返回值在正确的utf-8编码情况下是不可能返回的
   utf8.DecodeRune(b)
   b := []byte("Hello, 世界")

   for len(b) > 0 {
      r, size := utf8.DecodeRune(b)
      fmt.Printf("%c %v\n", r, size)

      b = b[size:]
   }
   // Output:
   // H 1
   // e 1
   // l 1
   // l 1
   // o 1
   // , 1
   //   1
   // 世 3
   // 界 3
}

func ExampleDecodeRuneInString() {
   // 类似DecodeRune但输入参数是字符串
   utf8.DecodeRuneInString(string(b))
   str := "Hello, 世界"

   for len(str) > 0 {
      r, size := utf8.DecodeRuneInString(str)
      fmt.Printf("%c %v\n", r, size)

      str = str[size:]
   }
   // Output:
   // H 1
   // e 1
   // l 1
   // l 1
   // o 1
   // , 1
   //   1
   // 世 3
   // 界 3
}

func ExampleEncodeRune() {
   // 将r的utf-8编码序列写入p（p必须有足够的长度），并返回写入的字节数
   utf8.EncodeRune(b, 'H')
   r := '世'
   buf := make([]byte, 3)

   n := utf8.EncodeRune(buf, r)

   fmt.Println(buf)
   fmt.Println(n)
   // Output:
   // [228 184 150]
   // 3
}

func ExampleFullRune() {
   // 判断切片p是否以一个码值的完整utf-8编码开始
   // 不合法的编码因为会被转换为宽度1的错误码值而被视为完整的
   // 如中文字符占3位byte，一位byte判断为false，完整的3位为true
   utf8.FullRune(b)
   buf := []byte{228, 184, 150} // 世
   fmt.Println(utf8.FullRune(buf))
   fmt.Println(utf8.FullRune(buf[:2]))
   // Output:
   // true
   // false
}

func ExampleFullRuneInString() {
   // 类似FullRune但输入参数是字符串
   utf8.FullRuneInString(string(b))
   str := "世"
   fmt.Println(utf8.FullRuneInString(str))
   fmt.Println(utf8.FullRuneInString(str[:2]))
   // Output:
   // true
   // false
}

func ExampleRuneCount() {
   // 返回p中的utf-8编码的码值的个数。错误或者不完整的编码会被视为宽度1字节的单个码值
   utf8.RuneCount(b)
   buf := []byte("Hello, 世界")
   fmt.Println("bytes =", len(buf))
   fmt.Println("runes =", utf8.RuneCount(buf))
   // Output:
   // bytes = 13
   // runes = 9
}

func ExampleRuneCountInString() {
   // 类似RuneCount但输入参数是一个字符串
   utf8.RuneCountInString(string(b))
   str := "Hello, 世界"
   fmt.Println("bytes =", len(str))
   fmt.Println("runes =", utf8.RuneCountInString(str))
   // Output:
   // bytes = 13
   // runes = 9
}

func ExampleRuneLen() {
   // 返回r编码后的字节数。如果r不是一个合法的可编码为utf-8序列的值，会返回-1
   utf8.RuneLen('世')
   fmt.Println(utf8.RuneLen('a'))
   fmt.Println(utf8.RuneLen('界'))
   // Output:
   // 1
   // 3
}

func ExampleRuneStart() {
   // 判断字节b是否可以作为某个rune编码后的第一个字节。第二个即之后的字节总是将左端两个字位设为10
   utf8.RuneStart('世')
   buf := []byte("a界")
   fmt.Println(utf8.RuneStart(buf[0]))
   fmt.Println(utf8.RuneStart(buf[1]))
   fmt.Println(utf8.RuneStart(buf[2]))
   // Output:
   // true
   // true
   // false
}

func ExampleValid() {
   // 判断切片p是否包含完整且合法的utf-8编码序列
   utf8.Valid(b)
   valid := []byte("Hello, 世界")
   invalid := []byte{0xff, 0xfe, 0xfd}

   fmt.Println(utf8.Valid(valid))
   fmt.Println(utf8.Valid(invalid))
   // Output:
   // true
   // false
}

func ExampleValidRune() {
   // 判断r是否可以编码为合法的utf-8序列
   utf8.ValidRune('H')
   valid := 'a'
   invalid := rune(0xfffffff)

   fmt.Println(utf8.ValidRune(valid))
   fmt.Println(utf8.ValidRune(invalid))
   // Output:
   // true
   // false
}

func ExampleValidString() {
   // 判断s是否包含完整且合法的utf-8编码序列
   utf8.ValidString(string(b))
   valid := "Hello, 世界"
   invalid := string([]byte{0xff, 0xfe, 0xfd})

   fmt.Println(utf8.ValidString(valid))
   fmt.Println(utf8.ValidString(invalid))
   // Output:
   // true
   // false
}
liao__ran
关注
2
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
go库函数之-unicode-使用示例

###############################################################D:\go\go\go库源码\源码库测试文件集合\unicode-example_test.go// Copyright 2015 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in .
复制链接

扫一扫