package com.lingyejun.dating.chap11; import java.nio.charset.StandardCharsets; import java.util.regex.Matcher; import java.util.regex.Pattern; public class SpecialSpace { public static void main(String[] args) { String str1 = "lingyejun " ; byte [] str1Bytes = str1.getBytes(); String space = new String(str1Bytes, StandardCharsets.UTF_8); System.out.println( "带有32 Space的字符串:" + space); System.out.println( "使用trim去掉32 -> Space:" + space.trim()); byte [] str2Bytes = new byte [ 11 ]; System.arraycopy(str1Bytes, 0 , str2Bytes, 0 , str1Bytes.length); str2Bytes[ 9 ] = ( byte ) 0xC2 ; str2Bytes[ 10 ] = ( byte ) 0xA0 ; String noBreakSpace = new String(str2Bytes, StandardCharsets.UTF_8); System.out.println( "带有C2 A0 -> NO-BREAK SPACE的字符串:" + noBreakSpace); System.out.println( "使用trim无法去掉C2 A0 -> NO-BREAK SPACE:" + noBreakSpace.trim()); // 32为我们平常谈论的Space空格 -> SPACE byte [] bytes1 = new byte []{( byte ) 0x20 }; String space1 = new String(bytes1, StandardCharsets.UTF_8); System.out.println( "UTF-8 字符编码号32 -> 0x1F 输出:" + space1); // 0xC2=194 0xA0=160 -> NO-BREAK SPACE byte [] bytes2 = new byte []{( byte ) 0xC2 , ( byte ) 0xA0 }; String space2 = new String(bytes2, StandardCharsets.UTF_8); char [] chars3 = space2.toCharArray(); System.out.println( "UTF-8 字符编码号194 -> 0xC2 160 -> 0xA0 输出:" + space2); byte [] bytes3 = new byte []{( byte ) 0xC2 , ( byte ) 0xA0 }; String c2a0Space = new String(bytes3, StandardCharsets.UTF_8); Pattern p = Pattern.compile(c2a0Space); Matcher m = null ; m = p.matcher(noBreakSpace); noBreakSpace = m.replaceAll( "" ); System.out.println( "使用正则去掉C2 A0 -> NO-BREAK SPACE:" + noBreakSpace); } } |