隔了两天没写,真是不爽阿。都是那个死freebsd,争取快点驾驭它。好了,下面来看看前面那个解析头部函数里面中的一些细节函数。
首先再讲前面里面的细节函数前,先看看HttpHeader这个类,它是一个来存放头部信息的一个类。
//自动增长HttpHeader数组大小,这个类很简单,先看看它的类成员以及它的构造函数:分别用了char[] name和char[] value来进行存放头部信息
package org.apache.catalina.connector.http;
/**
* HTTP header enum type.
*
* @author Remy Maucherat
* @version $Revision: 466595 $ $Date: 2006-10-21 23:24:41 +0100 (Sat, 21 Oct 2006) $
* @deprecated
*/
final class HttpHeader {
// -------------------------------------------------------------- Constants
public static final int INITIAL_NAME_SIZE = 32;
public static final int INITIAL_VALUE_SIZE = 64;
public static final int MAX_NAME_SIZE = 128;
public static final int MAX_VALUE_SIZE = 4096;
// ----------------------------------------------------------- Constructors
public HttpHeader() {
this(new char[INITIAL_NAME_SIZE], 0, new char[INITIAL_VALUE_SIZE], 0);
}
public HttpHeader(char[] name, int nameEnd, char[] value, int valueEnd) {
this.name = name;
this.nameEnd = nameEnd;
this.value = value;
this.valueEnd = valueEnd;
}
public HttpHeader(String name, String value) {
this.name = name.toLowerCase().toCharArray();
this.nameEnd = name.length();
this.value = value.toCharArray();
this.valueEnd = value.length();
}
// ----------------------------------------------------- Instance Variables
public char[] name;
public int nameEnd;
public char[] value;
public int valueEnd;
protected int hashCode = 0;
第一个细节:
//接着来看看parseHeaders函数中的allocateHeader函数,这个函数实是为parseHeaders函数分配一个HttpHeader对象。通过设定一个默认的HttpHeader池大小,
//如果池中的HttpHeader对象不够,那么自动增大HttpHeader池大小
package org.apache.catalina.connector.http.HttpRequestImpl;
/**
* Headers pool.
*/
protected HttpHeader[] headerPool = new HttpHeader[INITIAL_POOL_SIZE];
/**
* Allocate new header.
*
* @return an HttpHeader buffer allocated from the pool
*/
HttpHeader allocateHeader() {
if (nextHeader == headerPool.length) {//如果当前的位置超过的头部池的大小,那么扩大头部池
// Grow the pool
HttpHeader[] newHeaderPool =
new HttpHeader[headerPool.length + POOL_SIZE_INCREMENT];//重新初始化
for (int i = 0; i < nextHeader; i++) {
newHeaderPool[i] = headerPool[i];//将headerPool中的元素赋值给newHeaderPool数组
}
headerPool = newHeaderPool;
}
if (headerPool[nextHeader] == null)//如果当前的位置的headerPool元素为空,那么对该元素进行初始化
headerPool[nextHeader] = new HttpHeader();
return headerPool[nextHeader];//返回当前元素
}
第二个细节:大体做了三件事,
1,过滤掉换行符;
2,进行解析头部的key部分,这一部分的解析跟之前的差不多;
3,进行解析头部的value部分,这一部分需要注意一点的是value有可能实跨行的,也就是多行的形式出现
package org.apache.catalina.connector.http.SocketInputStream;
// -------------------------------------------------------------- Constants
/**
* CR.
*/
private static final byte CR = (byte) '\r';
/**
* LF.
*/
private static final byte LF = (byte) '\n';
/**
* SP.
*/
private static final byte SP = (byte) ' ';
/**
* HT.
*/
private static final byte HT = (byte) '\t';
/**
* COLON.
*/
private static final byte COLON = (byte) ':';
/**
* Lower case offset.
*/
private static final int LC_OFFSET = 'A' - 'a';
/**
* Internal buffer.
*/
protected byte buf[];
/**
* Last valid byte.
*/
protected int count;
/**
* Position in the buffer.
*/
protected int pos;
/**
* Underlying input stream.
*/
protected InputStream is;
/**
* Read a header, and copies it to the given buffer. This
* function is meant to be used during the HTTP request header parsing.
* Do NOT attempt to read the request body using it.
*
* @param header Request header
* @throws IOException If an exception occurs during the underlying socket
* read operations, or if the given buffer is not big enough to accomodate
* the whole line.
*/
public void readHeader(HttpHeader header)
throws IOException {
// Recycling check
if (header.nameEnd != 0)
header.recycle();
// Checking for a blank line
//过滤掉换行符,如果遇到了换行符,那么直接返回
int chr = read();
if ((chr == CR) || (chr == LF)) { // Skipping CR
if (chr == CR)
read(); // Skipping LF
header.nameEnd = 0;
header.valueEnd = 0;
return;
} else {
pos--;
}
// Reading the header name
int maxRead = header.name.length;
int readCount = 0;
boolean colon = false;
while (!colon) {
// if the buffer is full, extend it
//如果缓冲区满了,扩大缓冲区,这里的扩大方式跟上一节说过是一样的
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_NAME_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.name, 0, newBuffer, 0, maxRead);
header.name = newBuffer;
maxRead = header.name.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
// We're at the end of the internal buffer
//读到了内部buffer结束的位置,因为在上面已经成功读取一行数据了,接下来就是要逐个读取每个字节
//一旦pos大于count,将pos置0
if (pos >= count) {
int val = read();
if (val == -1) {
throw new IOException
(sm.getString("requestStream.readline.error"));
}
pos = 0;
}
//如果遇到了':'符号,说明为键值对的分隔符
if (buf[pos] == COLON) {
colon = true;
}
char val = (char) buf[pos];
if ((val >= 'A') && (val <= 'Z')) {
val = (char) (val - LC_OFFSET);//讲大写的字符转化为小写的
}
header.name[readCount] = val;//讲字符赋值给header.name字符数组
readCount++;
pos++;
}
header.nameEnd = readCount - 1;
// Reading the header value (which can be spanned over multiple lines)
//vaule的值有可能跨越多行
maxRead = header.value.length;
readCount = 0;
boolean eol = false;
boolean validLine = true;
while (validLine) {
boolean space = true;
// Skipping spaces
// Note : Only leading white spaces are removed. Trailing white
// spaces are not.
//移除前导的空格,末尾的不移除
while (space) {
// We're at the end of the internal buffer
//如果当前的pos大于数据的长度count,那么重新读入新的数据源,并且把pos置为0
if (pos >= count) {
// Copying part (or all) of the internal buffer to the line
// buffer
int val = read();
if (val == -1)
throw new IOException
(sm.getString("requestStream.readline.error"));
pos = 0;
}
if ((buf[pos] == SP) || (buf[pos] == HT)) {//过滤掉' '和'\t'
pos++;
} else {
space = false;//如果不是以上的两种字符,那么就跳出循环
}
}
while (!eol) {
// if the buffer is full, extend it
//扩大缓冲区
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_VALUE_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.value, 0, newBuffer, 0,
maxRead);
header.value = newBuffer;
maxRead = header.value.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
// We're at the end of the internal buffer
if (pos >= count) {
// Copying part (or all) of the internal buffer to the line
// buffer
int val = read();
if (val == -1)
throw new IOException
(sm.getString("requestStream.readline.error"));
pos = 0;
}
//过滤掉'\r'要和'\n',一旦读到'\n'字节,那么直接跳出循环
if (buf[pos] == CR) {
} else if (buf[pos] == LF) {
eol = true;
} else {
// FIXME : Check if binary conversion is working fine
//对二进制数据的转换
int ch = buf[pos] & 0xff;
header.value[readCount] = (char) ch;
readCount++;
}
pos++;
}
int nextChr = read();
//如果入读的字节不是' '或者是'\t',那么说明实新的信息头了,那么说明旧的信息头的value并不跨行
if ((nextChr != SP) && (nextChr != HT)) {
pos--;
validLine = false;
} else {//够则讲eol赋值为false,继续进行外部的循环体
eol = false;
// if the buffer is full, extend it
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_VALUE_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.value, 0, newBuffer, 0,
maxRead);
header.value = newBuffer;
maxRead = header.value.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
header.value[readCount] = ' ';
readCount++;
}
}
header.valueEnd = readCount;
}
好了,一天一点,先到这里先,明天再来看看如何解析cookies这一部分!抓取的时间来看看深入JVM学习
首先再讲前面里面的细节函数前,先看看HttpHeader这个类,它是一个来存放头部信息的一个类。
//自动增长HttpHeader数组大小,这个类很简单,先看看它的类成员以及它的构造函数:分别用了char[] name和char[] value来进行存放头部信息
package org.apache.catalina.connector.http;
/**
* HTTP header enum type.
*
* @author Remy Maucherat
* @version $Revision: 466595 $ $Date: 2006-10-21 23:24:41 +0100 (Sat, 21 Oct 2006) $
* @deprecated
*/
final class HttpHeader {
// -------------------------------------------------------------- Constants
public static final int INITIAL_NAME_SIZE = 32;
public static final int INITIAL_VALUE_SIZE = 64;
public static final int MAX_NAME_SIZE = 128;
public static final int MAX_VALUE_SIZE = 4096;
// ----------------------------------------------------------- Constructors
public HttpHeader() {
this(new char[INITIAL_NAME_SIZE], 0, new char[INITIAL_VALUE_SIZE], 0);
}
public HttpHeader(char[] name, int nameEnd, char[] value, int valueEnd) {
this.name = name;
this.nameEnd = nameEnd;
this.value = value;
this.valueEnd = valueEnd;
}
public HttpHeader(String name, String value) {
this.name = name.toLowerCase().toCharArray();
this.nameEnd = name.length();
this.value = value.toCharArray();
this.valueEnd = value.length();
}
// ----------------------------------------------------- Instance Variables
public char[] name;
public int nameEnd;
public char[] value;
public int valueEnd;
protected int hashCode = 0;
第一个细节:
//接着来看看parseHeaders函数中的allocateHeader函数,这个函数实是为parseHeaders函数分配一个HttpHeader对象。通过设定一个默认的HttpHeader池大小,
//如果池中的HttpHeader对象不够,那么自动增大HttpHeader池大小
package org.apache.catalina.connector.http.HttpRequestImpl;
/**
* Headers pool.
*/
protected HttpHeader[] headerPool = new HttpHeader[INITIAL_POOL_SIZE];
/**
* Allocate new header.
*
* @return an HttpHeader buffer allocated from the pool
*/
HttpHeader allocateHeader() {
if (nextHeader == headerPool.length) {//如果当前的位置超过的头部池的大小,那么扩大头部池
// Grow the pool
HttpHeader[] newHeaderPool =
new HttpHeader[headerPool.length + POOL_SIZE_INCREMENT];//重新初始化
for (int i = 0; i < nextHeader; i++) {
newHeaderPool[i] = headerPool[i];//将headerPool中的元素赋值给newHeaderPool数组
}
headerPool = newHeaderPool;
}
if (headerPool[nextHeader] == null)//如果当前的位置的headerPool元素为空,那么对该元素进行初始化
headerPool[nextHeader] = new HttpHeader();
return headerPool[nextHeader];//返回当前元素
}
第二个细节:大体做了三件事,
1,过滤掉换行符;
2,进行解析头部的key部分,这一部分的解析跟之前的差不多;
3,进行解析头部的value部分,这一部分需要注意一点的是value有可能实跨行的,也就是多行的形式出现
package org.apache.catalina.connector.http.SocketInputStream;
// -------------------------------------------------------------- Constants
/**
* CR.
*/
private static final byte CR = (byte) '\r';
/**
* LF.
*/
private static final byte LF = (byte) '\n';
/**
* SP.
*/
private static final byte SP = (byte) ' ';
/**
* HT.
*/
private static final byte HT = (byte) '\t';
/**
* COLON.
*/
private static final byte COLON = (byte) ':';
/**
* Lower case offset.
*/
private static final int LC_OFFSET = 'A' - 'a';
/**
* Internal buffer.
*/
protected byte buf[];
/**
* Last valid byte.
*/
protected int count;
/**
* Position in the buffer.
*/
protected int pos;
/**
* Underlying input stream.
*/
protected InputStream is;
/**
* Read a header, and copies it to the given buffer. This
* function is meant to be used during the HTTP request header parsing.
* Do NOT attempt to read the request body using it.
*
* @param header Request header
* @throws IOException If an exception occurs during the underlying socket
* read operations, or if the given buffer is not big enough to accomodate
* the whole line.
*/
public void readHeader(HttpHeader header)
throws IOException {
// Recycling check
if (header.nameEnd != 0)
header.recycle();
// Checking for a blank line
//过滤掉换行符,如果遇到了换行符,那么直接返回
int chr = read();
if ((chr == CR) || (chr == LF)) { // Skipping CR
if (chr == CR)
read(); // Skipping LF
header.nameEnd = 0;
header.valueEnd = 0;
return;
} else {
pos--;
}
// Reading the header name
int maxRead = header.name.length;
int readCount = 0;
boolean colon = false;
while (!colon) {
// if the buffer is full, extend it
//如果缓冲区满了,扩大缓冲区,这里的扩大方式跟上一节说过是一样的
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_NAME_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.name, 0, newBuffer, 0, maxRead);
header.name = newBuffer;
maxRead = header.name.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
// We're at the end of the internal buffer
//读到了内部buffer结束的位置,因为在上面已经成功读取一行数据了,接下来就是要逐个读取每个字节
//一旦pos大于count,将pos置0
if (pos >= count) {
int val = read();
if (val == -1) {
throw new IOException
(sm.getString("requestStream.readline.error"));
}
pos = 0;
}
//如果遇到了':'符号,说明为键值对的分隔符
if (buf[pos] == COLON) {
colon = true;
}
char val = (char) buf[pos];
if ((val >= 'A') && (val <= 'Z')) {
val = (char) (val - LC_OFFSET);//讲大写的字符转化为小写的
}
header.name[readCount] = val;//讲字符赋值给header.name字符数组
readCount++;
pos++;
}
header.nameEnd = readCount - 1;
// Reading the header value (which can be spanned over multiple lines)
//vaule的值有可能跨越多行
maxRead = header.value.length;
readCount = 0;
boolean eol = false;
boolean validLine = true;
while (validLine) {
boolean space = true;
// Skipping spaces
// Note : Only leading white spaces are removed. Trailing white
// spaces are not.
//移除前导的空格,末尾的不移除
while (space) {
// We're at the end of the internal buffer
//如果当前的pos大于数据的长度count,那么重新读入新的数据源,并且把pos置为0
if (pos >= count) {
// Copying part (or all) of the internal buffer to the line
// buffer
int val = read();
if (val == -1)
throw new IOException
(sm.getString("requestStream.readline.error"));
pos = 0;
}
if ((buf[pos] == SP) || (buf[pos] == HT)) {//过滤掉' '和'\t'
pos++;
} else {
space = false;//如果不是以上的两种字符,那么就跳出循环
}
}
while (!eol) {
// if the buffer is full, extend it
//扩大缓冲区
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_VALUE_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.value, 0, newBuffer, 0,
maxRead);
header.value = newBuffer;
maxRead = header.value.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
// We're at the end of the internal buffer
if (pos >= count) {
// Copying part (or all) of the internal buffer to the line
// buffer
int val = read();
if (val == -1)
throw new IOException
(sm.getString("requestStream.readline.error"));
pos = 0;
}
//过滤掉'\r'要和'\n',一旦读到'\n'字节,那么直接跳出循环
if (buf[pos] == CR) {
} else if (buf[pos] == LF) {
eol = true;
} else {
// FIXME : Check if binary conversion is working fine
//对二进制数据的转换
int ch = buf[pos] & 0xff;
header.value[readCount] = (char) ch;
readCount++;
}
pos++;
}
int nextChr = read();
//如果入读的字节不是' '或者是'\t',那么说明实新的信息头了,那么说明旧的信息头的value并不跨行
if ((nextChr != SP) && (nextChr != HT)) {
pos--;
validLine = false;
} else {//够则讲eol赋值为false,继续进行外部的循环体
eol = false;
// if the buffer is full, extend it
if (readCount >= maxRead) {
if ((2 * maxRead) <= HttpHeader.MAX_VALUE_SIZE) {
char[] newBuffer = new char[2 * maxRead];
System.arraycopy(header.value, 0, newBuffer, 0,
maxRead);
header.value = newBuffer;
maxRead = header.value.length;
} else {
throw new IOException
(sm.getString("requestStream.readline.toolong"));
}
}
header.value[readCount] = ' ';
readCount++;
}
}
header.valueEnd = readCount;
}
好了,一天一点,先到这里先,明天再来看看如何解析cookies这一部分!抓取的时间来看看深入JVM学习