String源码简析(下):
常用的方法:
1.substring(int beginIndex, int endIndex):得到一个子串.[beginIndex,endIndex)
public String substring(int beginIndex, int endIndex) {
if (beginIndex < 0) {
throw new StringIndexOutOfBoundsException(beginIndex);
}
if (endIndex > count) {
throw new StringIndexOutOfBoundsException(endIndex);
}
if (beginIndex > endIndex) {
throw new StringIndexOutOfBoundsException(endIndex - beginIndex);
}
return ((beginIndex == 0) && (endIndex == count)) ? this :
new String(offset + beginIndex, endIndex - beginIndex, value);
}
2.concat(String str) :将参数字符串添加到该字符串尾部。
public String concat(String str) {
int otherLen = str.length();
if (otherLen == 0) {
return this;
}
char buf[] = new char[count + otherLen];
getChars(0, count, buf, 0);
str.getChars(0, otherLen, buf, count);
return new String(0, count + otherLen, buf);
}
3.replace(char oldChar, char newChar):用newchar替换字符串中所有的oldchar字符
public String replace(char oldChar, char newChar) {
if (oldChar != newChar) {
int len = count;
int i = -1;
char[] val = value; /* avoid getfield opcode */
int off = offset; /* avoid getfield opcode */
while (++i < len) {
if (val[off + i] == oldChar) {
break;
}
}
if (i < len) {
char buf[] = new char[len];
for (int j = 0 ; j < i ; j++) {
buf[j] = val[off+j];
}
while (i < len) {
char c = val[off + i];
buf[i] = (c == oldChar) ? newChar : c;
i++;
}
return new String(0, len, buf);
}
}
return this;
}
4.对该字符串用相应的正则
public boolean matches(String regex) {
return Pattern.matches(regex, this);
}
5.contains(CharSequence s):判断字符序列s是否是该字符串的子集(实际调用的是静态的indexOf方法)
public boolean contains(CharSequence s) {
return indexOf(s.toString()) > -1;
}
6.split(String regex, int limit)/split(String regex):用正则对该字符串进行分割,limit表示最多分割的次数
public String[] split(String regex, int limit) {
return Pattern.compile(regex).split(this, limit);
}
public String[] split(String regex) {
return split(regex, 0);
}
7.toLowerCase(Locale locale):将字符串转成小写的核心代码,但是更常用的是他的重载方法toLowerCase().
public String toLowerCase(Locale locale) {
if (locale == null) {
throw new NullPointerException();
}
int firstUpper;
/* Now check if there are any characters that need to be changed. */
scan: {
for (firstUpper = 0 ; firstUpper < count; ) {
char c = value[offset+firstUpper];
if ((c >= Character.MIN_HIGH_SURROGATE) &&
(c <= Character.MAX_HIGH_SURROGATE)) {
int supplChar = codePointAt(firstUpper);
if (supplChar != Character.toLowerCase(supplChar)) {
break scan;
}
firstUpper += Character.charCount(supplChar);
} else {
if (c != Character.toLowerCase(c)) {
break scan;
}
firstUpper++;
}
}
return this;
}
char[] result = new char[count];
int resultOffset = 0; /* result may grow, so i+resultOffset
* is the write location in result */
/* Just copy the first few lowerCase characters. */
System.arraycopy(value, offset, result, 0, firstUpper);
String lang = locale.getLanguage();
boolean localeDependent =
(lang == "tr" || lang == "az" || lang == "lt");
char[] lowerCharArray;
int lowerChar;
int srcChar;
int srcCount;
for (int i = firstUpper; i < count; i += srcCount) {
srcChar = (int)value[offset+i];
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
(char)srcChar <= Character.MAX_HIGH_SURROGATE) {
srcChar = codePointAt(i);
srcCount = Character.charCount(srcChar);
} else {
srcCount = 1;
}
if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
} else {
lowerChar = Character.toLowerCase(srcChar);
}
if ((lowerChar == Character.ERROR) ||
(lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
if (lowerChar == Character.ERROR) {
lowerCharArray =
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
} else if (srcCount == 2) {
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
continue;
} else {
lowerCharArray = Character.toChars(lowerChar);
}
/* Grow result if needed */
int mapLen = lowerCharArray.length;
if (mapLen > srcCount) {
char[] result2 = new char[result.length + mapLen - srcCount];
System.arraycopy(result, 0, result2, 0,
i + resultOffset);
result = result2;
}
for (int x=0; x<mapLen; ++x) {
result[i+resultOffset+x] = lowerCharArray[x];
}
resultOffset += (mapLen - srcCount);
} else {
result[i+resultOffset] = (char)lowerChar;
}
}
return new String(0, count+resultOffset, result);
}
<hr />
public String toLowerCase() {
return toLowerCase(Locale.getDefault());
}
8.trim():去掉首尾的空格
public String trim() {
int len = count;
int st = 0;
int off = offset; /* avoid getfield opcode */
char[] val = value; /* avoid getfield opcode */
while ((st < len) && (val[off + st] <= ' ')) {
st++;
}
while ((st < len) && (val[off + len - 1] <= ' ')) {
len--;
}
return ((st > 0) || (len < count)) ? substring(st, len) : this;
}
9.toString():就是他自己
public String toString() {
return this;
}
其他方法:
1.substring(int beginIndex):得到一个子串
public String substring(int beginIndex) {
return substring(beginIndex, count);
}
2.subSequence(int beginIndex, int endIndex):得到一个子字符序列
public CharSequence subSequence(int beginIndex, int endIndex) {
return this.substring(beginIndex, endIndex);
}
3.toCharArray():将String转成数组,一个特殊版本的getChars
public char[] toCharArray() {
char result[] = new char[count];
getChars(0, count, result, 0);
return result;
}
注意点:
- String类中的value是不可变的,所以对一个字符串的增删改操作都会创建一个新的字符串对象,比如:
String a = new String("a");
for(int i=0;i<1000;i++)
a += "a";
//会创建1002个对象,在gc运行之间这些对象都不会回收,这是对内存极大的浪费!!!。
所以记住一个原则:
若一个字符序列会经常被增删改,那么请根据是否并发选择StringBuffer或StringBuild
若一个字符序列很少被增删改,那么请用String
- 从内存角度可以更加深刻的分析String:请看这篇博文