《ES6标准入门（第3版）》学习笔记11：chapter_4 字符串的扩展（一）

最新推荐文章于 2019-10-06 23:45:52 发布

壹小楷

最新推荐文章于 2019-10-06 23:45:52 发布

阅读量70

点赞数

分类专栏： ES6标准入门（第3版）学习笔记文章标签：字符串的扩展 ES6 学习总结

本文链接：https://blog.csdn.net/weixin_44582681/article/details/101321350

版权

ES6标准入门（第3版）学习笔记专栏收录该内容

29 篇文章 0 订阅

订阅专栏

这是第11篇笔记！
1 字符的Unicode表示法

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>字符串的Unicode表示法</title>
</head>
<body>
    

    <script>
    // JavaScript允许采用“\uxxxx”形式表示一个字符，其中xxxx表示Unicode码点
    var a = '\u0061';
    console.log(a);

    // 【注意】这种表示法只限于码点在“\u0000”—“\uFFFF”之间的字符，超过这个范围，必须用2个双字节的形式表达
    var b = '\uD842\uDFB7';
    console.log(b);

    var c = '\u20BB7';   // 直接在\u后面跟上超过0xFFFF的数值（\u20BB7），JavaScript会理解为“\u20BB”+“7”
    console.log(c);
    
    // ES6改进：只有将码点放入大括号，就能正确解读该字符
    var d = '\u{20BB7}';
    console.log(d);  // 
    
    var e = '\u{41}\u{42}\u{43}';
    console.log(e);  // 'ABC'

    console.log('\u{1F680}' === '\uD83D\uDE80');  // true
    // 这段代码表明，“大括号表示法”与四字节的UTF-16编码是等价的

    // 【总结】6种方法表示一个字符
    console.log('\z' === 'z');
    console.log('\172' === 'z');
    console.log('\x7A' === 'z');
    console.log('\u007A' === 'z');
    console.log('\u{7A}' === 'z');
    



    
    
    </script>
</body>
</html>

2 codePointAt()

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>codePointAt()</title>
</head>
<body>
    

    <script>
    // JavaScript内部，字符以UTF-16的格式储存，每个字符固定为2个字节
    // 对于那些需要4个自己储存的字节（Unicode码点大于0xFFFF的字符），JavaScript会认为他们是2个字符
    
    
    // var s = '?';
    // console.log(s.length);
    // console.log(s.charAt(0));
    // console.log(s.charAt(1));
    // console.log(s.charCodeAt(0));
    // console.log(s.charCodeAt(1));  // 57271
    // charAt() and charCodeAt()方法均不能正确处理4个字节储存的字符
    
    // (1) ES6提供的codePointAt()方法可以正确处理4个字节存储的字符，返回一个码点
    // var s1 = '?a';
    // console.log(s1.codePointAt(0));  // 134071
    // console.log(s1.codePointAt(1));  // 57271
    // console.log(s1.codePointAt(2));  // 97
    
    // 【总之】1, codePointAt()方法会正确返回32位（4个字节）的UTF-16字符的码点
    //        2, 对于那些2个字节存储的常规字符，他的返回结果与charCodeAt()方法相同

    
    // (2) codePointAt()方法返回的是码点的十进制值，如果想要十六进制的值，可以使用toString()方法
    var s1 = '?a';
    console.log(s1.codePointAt(0).toString(16));  // 20bb7
    console.log(s1.codePointAt(2).toString(16));  // 61  --> 字符串s1中‘a’的位置应该序号1，但是却要向codePointAt()方法输入2

    // 【解决】： 使用for...of循环 --> 能够正确识别32为的UTF-16字符
    var s1 = '?a';
    for (let ch of s1){
        console.log(ch.codePointAt(0).toString(16));
    }

    // (3) codePointAt()方法是测试一个字符时由2个字节还是4个字节组成的最简单的方式
    function is32Bit(c){
        return c.codePointAt(0) > 0xFFFF;

    }

    console.log(is32Bit('?'));
    console.log(is32Bit('a'));
    
    </script>
</body>
</html>

3 String.fromCodePoint()

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>String.fromCodePoint()</title>
</head>
<body>
    


    <script>
    // ES5：String.fromCharCode(): 用于从码点返回对应的字符，但是这个方法不能识别32位的UTF-16字符 

    // (1) ES6: 用于从码点返回对应的字符，这个方法可以识别32位的UTF-16字符 
    console.log(String.fromCodePoint(0x20BB7));  // ?
 
    // (2) 如果有多个参数，则他们会被合并为一个字符返回
    console.log(String.fromCodePoint(0x78, 0x1f680, 0x79) === 'x\uD83D\uDE80y');
    
    // 【注意】
    // fromCodePoint() --> 定义在String对象上
    // codePointAt() --> 定义在字符串的实例对象上
    
    
    
    
    </script>
</body>
</html>

4 字符串的遍历器接口

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>字符串的遍历器接口</title>
</head>
<body>
    
    <script>
    // (1) ES6为字符串提那家了“遍历器接口”（15章），使得字符串可以由for...of循环遍历
    for (let codePoint of 'foo32d'){
        console.log(codePoint);
    }
    
    // (2) 遍历器最大的特点是：可以识别0xFFFF的码点，传统的for循环无法识别这样的码点
    var text = String.fromCodePoint(0x20BB7);
    for (let i = 0; i < text.length; i ++){  // 无法
        console.log(text[i]);

    }
    
    for (let i of text){  // 可以
        console.log(i);

    }
    
    
    
    </script>
</body>
</html>

5 at()

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>at()</title>
</head>
<body>


    <script>
    // ES5: charAt(): 返回对应位置的字符，该方法不能识别大于"0xFFFF"的字符
        console.log('abc'.charAt(0)); 
        console.log('?'.charAt(0)); 

    // Es6: at(): 可以识别Unicode编号大于"0xFFFF"的字符 【提案】
    console.log('?'.at(0));   
    
    
    
    
    
    </script>
</body>
</html>

6 normalize()

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>normalize()</title>
</head>
<body>
    
    <script>
    // 许多欧洲语言有语调符号和重音符号，但是在JavaScript中无法识别这些符号 (重音符号与合成符号)
    // Unicode提供两个方法：一种重音符号表示，一种合成符号表示
    // 为此，出现了normalize() -->  将字符的不同表示方法统一为同样的形式（“Unicode正规化”）
    var result = ('\u01D1'.normalize() === '\u004F\u030C'.normalize());
    console.log(result);  // true
    
    // normalize() 可以接收一个参数来指定normalize的方式，主要有以下4个可选项：
    // a, NFC，表示“标准等价合成”，返回多个简单字符的合成字符，所谓“标准等价”指的是视觉和语义上的等价（默认参数）
    // b, NFD，表示“标准等价分解”，在标准等价的前提下，返回合成字符分解出的多个简单字符
    // c, NFKC，表示“兼容等价合成”，返回合成字符。“兼容等价” --> 语义上等价，但视觉上不等价，
    // d, NFKD，表示“兼容等价分解”，即在兼容等价的前提下，返回合成字符分解出的多个简单字符
    console.log('\u004F\u030C'.normalize('NFC').length);  // 1
    console.log('\u004F\u030C'.normalize('NFD').length);  // 2
    console.log('\u004F\u030C'.normalize('NFKC').length);  // 1
    console.log('\u004F\u030C'.normalize('NFKD').length);  // 2
    
    // normalize()方法目前无法识别3或3个以上字符的合成（改用正则表达式）
    
    
    
    
    </script>
</body>
</html>

7 includes() startsWith() endsWith()

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>includes() startsWith() endsWith()</title>
</head>
<body>
    


    <script>
    // JavaScript中只有indexOf()方法可用来确定一个字符串中是否包含了另一个字符串
    // ES6又提供了3个：
    // (1) includes() 返回布尔值，表示是否找到了参数字符串 
    // (2) startsWith()  返回布尔值，表示参数字符串是否在源字符串的头部 
    // (3) endsWith()  返回布尔值，表示参数字符串是否在源字符串的尾部 
    var s = 'hello world!';
    
    console.log(s.includes('hello'));
    console.log(s.startsWith('h'));
    console.log(s.endsWith('!'));
    
    // 这3个方法都支持第2参数，表示搜索的开始位置
    console.log(s.includes('hello', 6));
    console.log(s.startsWith('h', 0));
    console.log(s.endsWith('hello', 4)); 
    // 使用第2个参数时：
    // endsWith()方法 --> 针对前n个字符
    // startsWith()方法 --> 针对从第n个位置到字符串结束位置之间的字符
    
    
    
    
    
    
    </script>
</body>
</html>

让学习“上瘾”，成为更好的自己！！！

壹小楷

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
《ES6标准入门（第3版）》学习笔记11：chapter_4 字符串的扩展（一）

这是第11篇笔记！1 字符的Unicode表示法<!DOCTYPE html><html lang="en"><head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> ...
复制链接

扫一扫