String是平常使用最多的结构之一,String提供来很多方便的接口来处理字符串,同时其支持正则表达式,所以字符串处理能力很强,下面我们简单介绍下String的内部实现,重点关注下常用的一些方法实现,String的一些方法涉及到字符编码,这里不介绍字符编码相关的。
//String类是final的,即不可继承
public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
private final char value[];//内部存储结构,即以数组来存储其内容
private int hash;//标识一个字符串
//实现空字符串
public String() {
this.value = new char[0];
}
//用字符串构造字符串的
public String(String original) {
this.value = original.value;
this.hash = original.hash;
}
//用字符数组构造字符串
public String(char value[]) {
this.value = Arrays.copyOf(value, value.length);
}
//如果以字节数组构造,则涉及到字符编码的问题
public String(byte bytes[], Charset charset) {
this(bytes, 0, bytes.length, charset);
}
//获取长度信息,因为数组提供了长度信息,所以直接获取,否则可以提供一个长度信息
public int length() {
return value.length;
}
//判断是否为空
public boolean isEmpty() {
return value.length == 0;
}
//String也不是完全不可变的,这里可以直接修改内容
public char charAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return value[index];
}
//比较两字符串
public boolean equals(Object anObject) {
if (this == anObject) {//判断是否指向同一个对象
return true;
}
if (anObject instanceof String) {//判断是否是String类的对象
String anotherString = (String) anObject;
int n = value.length;//判断长度信息
if (n == anotherString.value.length) {
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
while (n-- != 0) {
if (v1[i] != v2[i])//判断内容
return false;
i++;
}
return true;
}
}
return false;
}
//比较两字符串的大小,比较过程和equals类似
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
}
return len1 - len2;
}
//忽略大小写的一种判断方法
public int compare(String s1, String s2) {
int n1 = s1.length();
int n2 = s2.length();
int min = Math.min(n1, n2);
for (int i = 0; i < min; i++) {
char c1 = s1.charAt(i);
char c2 = s2.charAt(i);
if (c1 != c2) {//判断是否相等
c1 = Character.toUpperCase(c1);//如果不相等,则都转换成大写进行判断
c2 = Character.toUpperCase(c2);
if (c1 != c2) {
c1 = Character.toLowerCase(c1);//转换成小写判断
c2 = Character.toLowerCase(c2);
if (c1 != c2) {
return c1 - c2;
}
}
}
}
return n1 - n2;
}
//比较两字符串是否相等
public boolean regionMatches(int toffset, String other, int ooffset,
int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
if ((ooffset < 0) || (toffset < 0) //判断参数有效性
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) { //比较内容
if (ta[to++] != pa[po++]) {
return false;
}
}
return true;
}
//判断是否以字串开始
public boolean startsWith(String prefix, int toffset) {
char ta[] = value;
int to = toffset;
char pa[] = prefix.value;
int po = 0;
int pc = prefix.value.length;
if ((toffset < 0) || (toffset > value.length - pc)) {//判断参数有效性
return false;
}
while (--pc >= 0) { //比较内容
if (ta[to++] != pa[po++]) {
return false;
}
}
return true;
}
//startWith的逆操作
public boolean endsWith(String suffix) {
return startsWith(suffix, value.length - suffix.value.length);
}
//查找字符
public int indexOf(int ch, int fromIndex) {
final int max = value.length;
if (fromIndex < 0) {//参数有效性判断
fromIndex = 0;
} else if (fromIndex >= max) {
return -1;
}
//if是执行字符有效性判断
if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
final char[] value = this.value;//获取字符串内容
for (int i = fromIndex; i < max; i++) {
if (value[i] == ch) {//比较内容
return i;
}
}
return -1;
} else {
return indexOfSupplementary(ch, fromIndex);
}
}
//删除空格
public String trim() {
int len = value.length;
int st = 0;
char[] val = value;
while ((st < len) && (val[st] <= ' ')) {//删除头部空格
st++;
}
while ((st < len) && (val[len - 1] <= ' ')) {//删除尾部空格
len--;
}
return ((st > 0) || (len < value.length)) ? substring(st, len) : this;//重新构造字符串
}
//返回字符数组
public char[] toCharArray() {
char result[] = new char[value.length];
System.arraycopy(value, 0, result, 0, value.length);//执行内存copy
return result;
}
//执行字符串分割操作
public String[] split(String regex, int limit) {
char ch = 0;
//如果分割符是这些字符,则自行进行分割
if (((regex.value.length == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();//借助链表来存储分割的元素
while ((next = indexOf(ch, off)) != -1) {//定位元素
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));//从主串里面substring分割元素
off = next + 1;
} else {//判断模式是否启用,而且已经使用的次数大于limit
list.add(substring(off, value.length));
off = value.length;
break;
}
}
//没有该字符,则返回完整的串
if (off == 0)
return new String[]{this};
//模式阀值未超过,则添加剩余的串
if (!limited || list.size() < limit)
list.add(substring(off, value.length));
int resultSize = list.size();
if (limit == 0)
while (resultSize > 0 && list.get(resultSize - 1).length() == 0)
resultSize--;
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);//返回字符数组
}
//否则直接调用正则表达式进行分割
return Pattern.compile(regex).split(this, limit);
}