在源码中okhttp-tests 中查看HttpUrlTest 单元测试类,该类包含了各种各样的url 样式,看其构造方法为:
HttpUrl parse(String url) {
return useGet
? HttpUrl.get(url)
: HttpUrl.parse(url);
}
httpUrl.get(url) 方法也是通过paser(url)进行url 的解析:以 parseTrimsAsciiWhitespace方法进行测试:
HttpUrl expected = parse("http://host/");
先看一下pase(url)方法:
Builder parse(@Nullable HttpUrl base, String input) {
int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
// Scheme.
int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);
if (schemeDelimiterOffset != -1) {
if (input.regionMatches(true, pos, "https:", 0, 6)) {
this.scheme = "https";
pos += "https:".length();
} else if (input.regionMatches(true, pos, "http:", 0, 5)) {
this.scheme = "http";
pos += "http:".length();
} else {
throw new IllegalArgumentException("Expected URL scheme 'http' or 'https' but was '"
+ input.substring(0, schemeDelimiterOffset) + "'");
}
} else if (base != null) {
this.scheme = base.scheme;
} else {
throw new IllegalArgumentException(
"Expected URL scheme 'http' or 'https' but no colon was found");
}
// Authority.
boolean hasUsername = false;
boolean hasPassword = false;
int slashCount = slashCount(input, pos, limit);
if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {
// Read an authority if either:
// * The input starts with 2 or more slashes. These follow the scheme if it exists.
// * The input scheme exists and is different from the base URL's scheme.
//
// The structure of an authority is:
// username:password@host:port
//
// Username, password and port are optional.
// [username[:password]@]host[:port]
pos += slashCount;
authority:
while (true) {
int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");
int c = componentDelimiterOffset != limit
? input.charAt(componentDelimiterOffset)
: -1;
switch (c) {
case '@':
// User info precedes.
if (!hasPassword) {
int passwordColonOffset = delimiterOffset(
input, pos, componentDelimiterOffset, ':');
String canonicalUsername = canonicalize(
input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true,
null);
this.encodedUsername = hasUsername
? this.encodedUsername + "%40" + canonicalUsername
: canonicalUsername;
if (passwordColonOffset != componentDelimiterOffset) {
hasPassword = true;
this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true,
null);
}
hasUsername = true;
} else {
this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true,
null);
}
pos = componentDelimiterOffset + 1;
break;
case -1:
case '/':
case '\\':
case '?':
case '#':
// Host info precedes.
int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);
if (portColonOffset + 1 < componentDelimiterOffset) {
host = canonicalizeHost(input, pos, portColonOffset);
port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);
if (port == -1) {
throw new IllegalArgumentException("Invalid URL port: \""
+ input.substring(portColonOffset + 1, componentDelimiterOffset) + '"');
}
} else {
host = canonicalizeHost(input, pos, portColonOffset);
port = defaultPort(scheme);
}
if (host == null) {
throw new IllegalArgumentException(
INVALID_HOST + ": \"" + input.substring(pos, portColonOffset) + '"');
}
pos = componentDelimiterOffset;
break authority;
}
}
} else {
// This is a relative link. Copy over all authority components. Also maybe the path & query.
this.encodedUsername = base.encodedUsername();
this.encodedPassword = base.encodedPassword();
this.host = base.host;
this.port = base.port;
this.encodedPathSegments.clear();
this.encodedPathSegments.addAll(base.encodedPathSegments());
if (pos == limit || input.charAt(pos) == '#') {
encodedQuery(base.encodedQuery());
}
}
// Resolve the relative path.
int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
resolvePath(input, pos, pathDelimiterOffset);
pos = pathDelimiterOffset;
// Query.
if (pos < limit && input.charAt(pos) == '?') {
int queryDelimiterOffset = delimiterOffset(input, pos, limit, '#');
this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true, null));
pos = queryDelimiterOffset;
}
// Fragment.
if (pos < limit && input.charAt(pos) == '#') {
this.encodedFragment = canonicalize(
input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false, null);
}
return this;
}
int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
进入这段方法看下:
public static int skipLeadingAsciiWhitespace(String input, int pos, int limit) {
for (int i = pos; i < limit; i++) {
switch (input.charAt(i)) {
case '\t':
case '\n':
case '\f':
case '\r':
case ' ':
continue;
default:
return i;
}
}
return limit;
}
看起来挺简单的,方法名起的挺高大上的,需要我们去学习,此方法是返回第一个不是ASCII 码的空格位置,先不去管他看看第二个方法
int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
进入方法:
/**
* Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at
* {@code pos}.
*/
public static int skipTrailingAsciiWhitespace(String input, int pos, int limit) {
for (int i = limit - 1; i >= pos; i--) {
switch (input.charAt(i)) {
case '\t':
case '\n':
case '\f':
case '\r':
case ' ':
continue;
default:
return i + 1;
}
}
return pos;
}
从 for 循环中可以看到和 skipTrailingAsciiWhitespace 方法相反从后面查找,go 一起去看看:
limit 为12 没有问题
schemeDelimiterOffset 继续往下看:
/**
* Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if
* {@code input} does not have a scheme that starts at {@code pos}.
*/
private static int schemeDelimiterOffset(String input, int pos, int limit) {
if (limit - pos < 2) return -1;
char c0 = input.charAt(pos);
if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.
for (int i = pos + 1; i < limit; i++) {
char c = input.charAt(i);
if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '+'
|| c == '-'
|| c == '.') {
continue; // Scheme character. Keep going.
} else if (c == ':') {
return i; // Scheme prefix!
} else {
return -1; // Non-scheme character before the first ':'.
}
}
return -1; // No ':'; doesn't start with a scheme.
}
看到这部分
} else if (c == ':') { return i; // Scheme prefix! } 可以知道该方法是获取 url ‘:’之前的index
// Authority. boolean hasUsername = false; boolean hasPassword = false;
请求中 Authority 添加认证信息的格式为 username:password 例如:
http://username:password@www.my_site.com
这种格式
接下来继续往下走 slashCount(input, pos, limit);
/** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */
private static int slashCount(String input, int pos, int limit) {
int slashCount = 0;
while (pos < limit) {
char c = input.charAt(pos);
if (c == '\\' || c == '/') {
slashCount++;
pos++;
} else {
break;
}
}
return slashCount;
}
从注释看 返回 斜杠 ‘\’ 、‘\’ 的个数 ,进去看if 块中的代码 else 中可以看到如果遇到非 '/' 返回个数
/**
* Returns the index of the first character in {@code input} that contains a character in {@code
* delimiters}. Returns limit if there is no such character.
*/
public static int delimiterOffset(String input, int pos, int limit, String delimiters) {
for (int i = pos; i < limit; i++) {
if (delimiters.indexOf(input.charAt(i)) != -1) return i;
}
return limit;
}
遍历url 如果出现 delimiters ("@/\\?#")中的任何字符,返回会该字符的位置的前一个
portColonOffset
/** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */
private static int portColonOffset(String input, int pos, int limit) {
for (int i = pos; i < limit; i++) {
switch (input.charAt(i)) {
case '[':
while (++i < limit) {
if (input.charAt(i) == ']') break;
}
break;
case ':':
return i;
}
}
return limit; // No colon.
}
获取 类似 “http://[:0000:0000:0000:0000:0000:0000:0000:0001]” 这种格式 以“]” 结尾的长度 应该是处理ipv6格式,不太常见先跳过
接下来进入 canonicalizeHost 这个方法:
static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {
for (int i = pos; i < limit; i++) {
char c = encoded.charAt(i);
if (c == '%' || (c == '+' && plusIsSpace)) {
// Slow path: the character at i requires decoding!
Buffer out = new Buffer();
out.writeUtf8(encoded, pos, i);
percentDecode(out, encoded, i, limit, plusIsSpace);
return out.readUtf8();
}
}
// Fast path: no characters in [pos..limit) required decoding.
return encoded.substring(pos, limit);
}
我们进入 percentDecode 这个方法看下,
static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
int codePoint;
for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
codePoint = encoded.codePointAt(i);
if (codePoint == '%' && i + 2 < limit) {
int d1 = decodeHexDigit(encoded.charAt(i + 1));
int d2 = decodeHexDigit(encoded.charAt(i + 2));
if (d1 != -1 && d2 != -1) {
out.writeByte((d1 << 4) + d2);
i += 2;
continue;
}
} else if (codePoint == '+' && plusIsSpace) {
out.writeByte(' ');
continue;
}
out.writeUtf8CodePoint(codePoint);
}
}
方法中的i+2 < limit 判断% 号后面是否还有其他字符,decodeHexDigit 用来转化16进制 ,(d1 << 4) +d2 返编码%x 此处计算可参考 位计算相关知识
canonicalizeHost 方法判断主机名是否有效
int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
resolvePath(input, pos, pathDelimiterOffset);
pos = pathDelimiterOffset;
private void resolvePath(String input, int pos, int limit) {
// Read a delimiter.
if (pos == limit) {
// Empty path: keep the base path as-is.
return;
}
char c = input.charAt(pos);
if (c == '/' || c == '\\') {
// Absolute path: reset to the default "/".
encodedPathSegments.clear();
encodedPathSegments.add("");
pos++;
} else {
// Relative path: clear everything after the last '/'.
encodedPathSegments.set(encodedPathSegments.size() - 1, "");
}
// Read path segments.
for (int i = pos; i < limit; ) {
int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");
boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;
push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);
i = pathSegmentDelimiterOffset;
if (segmentHasTrailingSlash) i++;
}
}
解析path 路径
这个类中关键部分基本上就是这些方法了