String类源码笔记----JDK1.8

最新推荐文章于 2023-05-11 18:25:54 发布

天上飞的云传奇

最新推荐文章于 2023-05-11 18:25:54 发布

阅读量348

点赞数 1

分类专栏：集合源码+JUC源码文章标签： java 字符串

本文链接：https://blog.csdn.net/weixin_45862170/article/details/119391888

版权

集合源码+JUC源码专栏收录该内容

31 篇文章 0 订阅

订阅专栏

String类源码笔记----JDK1.8

结构

类的私有属性

//存储每个字符，final修饰，此数组第一次赋值后不可变。
//1.final修饰会被JVM缓存(还没学JVM)
//2.在多线程下安全，可共享
private final char value[];
//哈希值
private int hash; // Default to 0
private static final long serialVersionUID = -6849794470754667710L;
private static final ObjectStreamField[] serialPersistentFields =
        new ObjectStreamField[0];

构造方法

$[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-77VZu3sF-1628078000304)(C:\Users\wty20200117\AppData\Roaming\Typora\typora-user-images\image-20210803203410053.png)]$

 //1.无参构造
 public String() {this.value = "".value;}//此构造函数，实例化后的对象大小为0，就是一个空字符串

 //2.新创建的字符串对象是参数对象的副本，除非需要显示副本，不需要使用此构造函数，因为字符串是不可变的
 public String(String original) {
        this.value = original.value;
        this.hash = original.hash;
 }

 //3.使用参数数组创建一个新的字符串对象，之后参数数组修改后不会影响新字符串
 //Arrays.copyOf是拷贝后返回一个新数组，system.arrayCopy是可以对目标数组拷贝（之后看到Arrays类时再深究）
 public String(char value[]) {
        this.value = Arrays.copyOf(value, value.length);
  }

 //4.offset - 初始偏移量 count - 长度 (感觉人家写的代码好严谨)
 //Arrays.copyOfRange部分拷贝后返回一个新数组 实现和Arrays.copyOf一样 还是用的是system.arrayCopy 
 public String(char value[], int offset, int count) {
        if (offset < 0) { //异常处理
            throw new StringIndexOutOfBoundsException(offset);//如果 offset和 count参数的索引字符超出了 value数组的界限 
        }
        if (count <= 0) {
            if (count < 0) {//异常处理
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= value.length) {//还是个异常处理
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > value.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }
        this.value = Arrays.copyOfRange(value, offset, offset+count);//让我写可能就这一句
    }	
 //测试
    char[] chars = {'a','b','c','d'};
    String a = new String(chars,1,2);
    System.out.println(a);//bc
 // Note: offset or count might be near -1>>>1.注意：偏移量或计数可能接近 -1>>>1。应该表示极大值
 /*
  	System.out.println(-1>>>1); 2147483647 0x7fffffff
  	System.out.println(Integer.MAX_VALUE);2147483647 0x7fffffff
 	>>>是无符号右移 可以搜Java位运算
 	Java中int类型占32位
 	-1>>>1无符号右移一位
 */

 //5.这个构造方法感觉不好用
 //0x0000-0xffff是bmp基本字符，0x10000-0x10ffff是增补字符
 //Character.isBmpCodePoint(c) c>>>16 == 0 确定指定字符（Unicode代码点）是否在Basic Multilingual Plane (BMP)中 。
 //Character.isValidCodePoint(c) 确定指定字符（Unicode代码点）是否是字符中。
 //[编码格式简介（ANSI、GBK、GB2312、UTF-8、GB18030和 UNICODE）](https://blog.csdn.net/ldanduo/article/details/8203532/)
 //Java 7之基础类型第4篇 - Java字符类型 https://blog.csdn.net/mazhimazh/article/details/17708001
 //Java中的字符集编码入门（六）Java中的增补字符 https://blog.csdn.net/junJZ_2008/article/details/83487036 
 public String(int[] codePoints, int offset, int count) {
        if (offset < 0) { //异常判断
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count <= 0) {
            if (count < 0) { //异常判断
                throw new StringIndexOutOfBoundsException(count);
            }
            if (offset <= codePoints.length) {
                this.value = "".value;
                return;
            }
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > codePoints.length - count) { //异常判断
            throw new StringIndexOutOfBoundsException(offset + count);
        }

        final int end = offset + count;

        // Pass 1: Compute precise size of char[] 计算char[]的精确大小
        int n = count;
        for (int i = offset; i < end; i++) {//这个for循环计算n的大小
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))//一般字母字符都在bmp中
                continue;
            else if (Character.isValidCodePoint(c))//判断是否小于0x10ffff 
                n++;
            else throw new IllegalArgumentException(Integer.toString(c));
        }

        // Pass 2: Allocate and fill in char[] 分配值到char[]
        final char[] v = new char[n];

        for (int i = offset, j = 0; i < end; i++, j++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))
                v[j] = (char)c;
            else
                Character.toSurrogates(c, v, j++);//无
        }

        this.value = v;
    }
//6. 其它的了解下

实现的接口

implements java.io.Serializable, Comparable<String>, CharSequence

Serializable : 序列化，string可以写到io流中，并可保存整个对象以及用于网络传输
```
private static final long serialVersionUID = -6849794470754667710L;
```

Comparable : 比较，若返回值>0则，当前字符串小于另一个字符串

//这个接口就一个方法
public int compareTo(T o);
//String类的实现
/*
源码解释挺多字的
1. 按字典顺序比较两个字符串。 比较基于字符串中每个字符的 Unicode 值
2. 俩字符串按照字典顺序比较，返回在k这个字符不同处的差值 this.charAt(k)-anotherString.charAt(k)
3. 如果是 abc   abcde  则返回长度的差值 this.length()-anotherString.length() 
*/
public int compareTo(String anotherString) {
        int len1 = value.length;
        int len2 = anotherString.value.length;
        int lim = Math.min(len1, len2);//找到长度小的减少while遍历次数
        char v1[] = value;
        char v2[] = anotherString.value;

        int k = 0;
        while (k < lim) {
            char c1 = v1[k];
            char c2 = v2[k];
            if (c1 != c2) {
                return c1 - c2;//这里就是解释2
            }
            k++;
        }
        return len1 - len2;//解释3
}
测试
    System.out.println("abc".compareTo("abe")); 返回-2 也就是Unicode差值
    System.out.println("abc".compareTo("abcdef"));返回-3 字符串长度差值
    System.out.println("abc".compareTo("abdefaaa"));返回-1 按照代码执行顺序先执行解释2

CharSequence：CharSequence与String都能用于定义字符串，但CharSequence的值是可读可写序列，而String的值是只读序列

常用方法

charAt(),这个方法可以用到想要取出字符串单个字符的时候

//返回char指定索引处的值。 指数范围为0至length() - 1 。 该序列的第一个char值在索引0 ，下一个索引为1 ，依此类推，与数组索引一样。 
public char charAt(int index) {
        if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return value[index];//直接在数组中取
}

equals方法重点看JDK帮助文档其实解释挺详细的就是没代码

/*
 * 在Object中该方法只是简单的 == 判断
 * 这是被String类重写的方法，所有类都继承自超类Object
 * String类中该方法作用，判断两个字符串是否值相同
*/

public boolean equals(Object anObject) {
        if (this == anObject) {//引用地址一样那值必然一样
            return true;
        }
        if (anObject instanceof String) {//instanceof是判断anObject是不是String类型
            String anotherString = (String)anObject;
            int n = value.length;
            if (n == anotherString.value.length) {//然后判断长度，毕竟长度不一样那肯定就值不一样
                char v1[] = value;
                char v2[] = anotherString.value;
                int i = 0;
                while (n-- != 0) {//判断每个字符
                    if (v1[i] != v2[i])
                        return false;
                    i++;
                }
                return true;
            }
        }
        return false;
}
 //忽略大小写
 //char u1 = Character.toUpperCase(c1);
 //char u2 = Character.toUpperCase(c2);
  public boolean equalsIgnoreCase(String anotherString) {
        return (this == anotherString) ? true
                : (anotherString != null)
                && (anotherString.value.length == value.length)
                && regionMatches(true, 0, anotherString, 0, value.length);
    }

public boolean startsWith(String prefix) 测试字符串是否以指定字符串开头，为前缀。

重载方法

public boolean startsWith(String prefix,int toffset) 前缀在指定位置开始匹配

public boolean endsWith(String suffix) 这个就有趣了简单的封装方便了使用

public boolean startsWith(String prefix, int toffset) {
        char ta[] = value;
        int to = toffset;
        char pa[] = prefix.value;
        int po = 0;
        int pc = prefix.value.length;
        // Note: toffset might be near -1>>>1.
        if ((toffset < 0) || (toffset > value.length - pc)) {
            return false;
        }
        while (--pc >= 0) {//pc匹配的次数
            if (ta[to++] != pa[po++]) {//匹配有一个字符不相同就返回false
                return false;
            }
        }
        return true;
}

public boolean endsWith(String suffix) {
        return startsWith(suffix, value.length - suffix.value.length);
}

hashCode()方法重要重写了Object的真的就看源码解释翻译一下就很详细

public int hashCode() {
    int h = hash;
    if (h == 0 && value.length > 0) {
        char val[] = value;

        for (int i = 0; i < value.length; i++) {//hashCode计算规则  s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
            h = 31 * h + val[i];
        }
        hash = h;
    }
    return h;
}

public int indexOf(int ch) 返回指定字符第一次出现的索引未找到返回-1

public int indexOf(int ch, int fromIndex) 从指定索引开始找

public int lastIndexOf(int ch) 返回此字符串中最后一次出现的指定字符的索引

public int lastIndexOf(int ch, int fromIndex)

public int indexOf(int ch, int fromIndex) {
        final int max = value.length;
        if (fromIndex < 0) {
            fromIndex = 0;
        } else if (fromIndex >= max) {
            // Note: fromIndex might be near -1>>>1.
            return -1;
        }
		//Character.MIN_SUPPLEMENTARY_CODE_POINT 最小补充码点 0x010000 增补字段从0x010000-0x100000
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { //ch是bmp中的 范围是 0x0000-0xffff(包括) 人家也有解释
            // handle most cases here (ch is a BMP code point or a 在这里处理大多数情况（ch 是 BMP 代码点或
            // negative value (invalid code point))负值（无效代码点））
            final char[] value = this.value;
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == ch) {
                    return i;
                }
            }
            return -1;
        } else {
            return indexOfSupplementary(ch, fromIndex);//使用补充字符处理（罕见）对 indexOf 的调用。
        }
    }

public int indexOf(String str) 返回此字符串中第一次出现指定子字符串的索引返回的是索引的最小值k this.startsWith(str, k)

public int indexOf(String str, int fromIndex)

static int indexOf(char[] source, int sourceOffset, int sourceCount,String target, int fromIndex)

static int indexOf(char[] source, int sourceOffset, int sourceCount,char[] target, int targetOffset, int targetCount,int fromIndex)

//真正解决问题的方法
static int indexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount,
            int fromIndex) {
        if (fromIndex >= sourceCount) {
            return (targetCount == 0 ? sourceCount : -1);
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        if (targetCount == 0) {
            return fromIndex;
        }

        char first = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);

        for (int i = sourceOffset + fromIndex; i <= max; i++) {
            /* Look for first character. */
            if (source[i] != first) {
                while (++i <= max && source[i] != first);
            }

            /* Found first character, now look at the rest of v2 */
            if (i <= max) {
                int j = i + 1;
                int end = j + targetCount - 1;
                for (int k = targetOffset + 1; j < end && source[j]
                        == target[k]; j++, k++);

                if (j == end) {
                    /* Found whole string. */
                    return i - sourceOffset;
                }
            }
        }
        return -1;
    }

subString（截取）返回的是一个新new的字符串

//返回一个字符串，该字符串是此字符串的子字符串。 子字符串以指定索引处的字符开头，并扩展到该字符串的末尾
public String substring(int beginIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        int subLen = value.length - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);//真的处理就这一句，前面的都是让代码更加严谨
}

//从指定位置开始 指定位置结束
 public String substring(int beginIndex, int endIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        if (endIndex > value.length) {
            throw new StringIndexOutOfBoundsException(endIndex);
        }
        int subLen = endIndex - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return ((beginIndex == 0) && (endIndex == value.length)) ? this
                : new String(value, beginIndex, subLen);
    }

concat 将指定的字符串连接到此字符串的末尾返回一个新字符串

  public String concat(String str) {
        int otherLen = str.length();
        if (otherLen == 0) {
            return this;
        }
        int len = value.length;
        char buf[] = Arrays.copyOf(value, len + otherLen);
        str.getChars(buf, len);
        return new String(buf, true);
    }

replace替换这个方法挺有用处的

//返回一个字符串，该字符串是用newChar替换此字符串中所有出现的oldChar 。
public String replace(char oldChar, char newChar) {
        if (oldChar != newChar) {
            int len = value.length;
            int i = -1;
            char[] val = value; /* avoid getfield opcode */
			
            //遍历一次全部看有无要替换字符
            while (++i < len) {
                if (val[i] == oldChar) {
                    break;
                }
            }
            if (i < len) {//有则必然小于len
                char buf[] = new char[len];
                // 注：这里是先将 i 之前的直接赋给新数组buf
                for (int j = 0; j < i; j++) {
                    buf[j] = val[j];
                }
                while (i < len) {
                    char c = val[i];//c是旧的就替换为新的 不是就是c
                    buf[i] = (c == oldChar) ? newChar : c;
                    i++;
                }
                return new String(buf, true);
            }
        }
        return this;
}
public String replaceFirst(String regex, String replacement) { // 只替换第一次出现的字符串
      // 正则表达式
      return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
}
public String replaceAll(String regex, String replacement) { // 字符串替换，全部
        return Pattern.compile(regex).matcher(this).replaceAll(replacement);
}

split 字符串分割，返回string[]

public String[] split(String regex, int limit) // regex:分隔符，limit：拆分的个数
public String[] split(String regex) // return split(regex, 0);

//源码不好读，直接测试
String ce = "icu.wdetian0918.www";
String[] split = ce.split("\\.");//.  / | 特殊字符需要转义
System.out.println(split.length);
for (String s:split) {
    System.out.println(s);
}
3
icu
wdetian0918
www

join连接好像一直没用到过

// delimiter:分隔符，elements:数据（list/array)
public static String join(CharSequence delimiter, CharSequence... elements) {
        Objects.requireNonNull(delimiter);
        Objects.requireNonNull(elements);
        // 底层是StringBuilder.append（）
        StringJoiner joiner = new StringJoiner(delimiter);
        for (CharSequence cs: elements) {
            joiner.add(cs);
        }
        return joiner.toString();
    }

List<String> names=new ArrayList<String>();
names.add("1");
names.add("2");
names.add("3");
System.out.println(String.join("-", names)); // 1-2-3
 
String[] arrStr=new String[]{"a","b","c"};
System.out.println(String.join("-", arrStr)); // a-b-c

大小写转化的方法

toLowerCase 转换为小写
toUpperCase 转化为大写

trim() 也没怎么用过是用来删除字符串前面后面的空格

public String trim() {
    int len = value.length;
    int st = 0;
    char[] val = value;    /* avoid getfield opcode */

    while ((st < len) && (val[st] <= ' ')) {
        st++;
    }
    while ((st < len) && (val[len - 1] <= ' ')) {
        len--;
    }
    return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
}
//测试
String ce = "  icu.wdetia n0918.www  ";
System.out.println(ce);
System.out.println(ce.trim());
  icu.wdetia n0918.www  
icu.wdetia n0918.www

toCharArray 这个方法感觉有点用，返回一个新的字符数组大小为String的长度，内容为String的值

public char[] toCharArray() {
        // Cannot use Arrays.copyOf because of class initialization order issues
    	//由于类初始化顺序问题，无法使用 Arrays.copyOf  
    	//这个不懂 应该jvm
        char result[] = new char[value.length];
        System.arraycopy(value, 0, result, 0, value.length);//System一般都比较屌
        return result;
}

valueOf() 将其它类型转化为String类型
```
//每个常用基本类型都有
```
$[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ASEyquMx-1628078000307)(C:\Users\wty20200117\AppData\Roaming\Typora\typora-user-images\image-20210804194234363.png)]$

或者直接在后面使用

1+“”
public native String intern(); 和字符串常量池栈堆有关