目录
1. 本节代码
2. 状态机(有限状态机)
2.1 概念知识
- 每一个状态都是一个机器
- 在每个机器里,我们可以做计算、存储、输出…
- 所有的这些机器接受的输入是一致的
- 状态机的每一个机器本身没有状态,如果我们用函数来表示的话,他应该是纯函数(无副作用,函数内部不再受函数外部的输入控制)
- 每一个计器知道下一个状态
- 每个机器都有确定的下一个状态(Moore)
- 每个机器根据 输入决定下一个状态(Mealy)
//每个函数是一个状态
function state(input){ //函数参数就是输入
//TODO 在函数中可以自由编写代码,处理每个状态的逻辑
return next; //返回值将作为下一个状态
}
/***********调用************/
while(input) {
//获取输入
state = state(input); //把状态机的返回值作为下一个状态
}
2.2 使用状态机处理字符
1). 使用状态机查询字符串中是否包含"abc"
function match(src) {
let state = start;
for(let char of src) {
state = state(char);
}
//最终判断状态是否处于end(结束)状态
return state === end;
}
function start(char) {
if(char === 'a') {
return foundA;
}else{
return start;
}
}
function foundA(char) {
if(char === 'b') {
return foundB;
}else{
//注意此处调用start函数
return start(char);
}
}
function foundB(char) {
if(char === 'c') {
return end;
}else{
return start(char);
}
}
function end(c) {
return end;
}
2). 使用状态机查询字符串中是否包含"abcabx"
function match(src) {
let state = start;
for(let char of src) {
state = state(char);
}
return state === end;
}
function start(char) {
if(char === 'a') {
return foundA;
}else{
return start;
}
}
function foundA(char) {
if(char === 'b') {
return foundB;
}else{
return start(char);
}
}
function foundB(char) {
if(char === 'c') {
return foundC;
}else{
return start(char);
}
}
function foundC(char) {
if(char === 'a') {
return foundA2;
}else{
return start(char);
}
}
function foundA2(char) {
if(char === 'b') {
return foundB2;
}else{
return start(char);
}
}
function foundB2(char) {
if(char === 'x') {
return end;
}else{
//TODO 注意此处调用了foundB函数
return foundB(char);
}
}
function end(c) {
return end;
}
3). 使用状态机查询字符串中是否包含"abababx"
function match(src) {
let state = start;
for(let char of src) {
state = state(char);
}
return state === end;
}
function start(char) {
if(char === 'a') {
return foundA;
}else{
return start;
}
}
function foundA(char) {
if(char === 'b') {
return foundB;
}else{
return start(char);
}
}
function foundB(char) {
if(char === 'a') {
return foundA2;
}else{
return start(char);
}
}
function foundA2(char) {
if(char === 'b') {
return foundB2;
}else{
return start(char);
}
}
function foundB2(char) {
if(char === 'a') {
return foundA3;
}else{
return foundB(char);
}
}
function foundA3(char) {
if(char === 'b') {
return foundB3;
}else{
return start(char);
}
}
function foundB3(char) {
if(char === 'x') {
return end;
}else{
return foundB2(char);
}
}
function end(c) {
return end;
}
2.3 字符串 KMP 算法
1). 参考链接
3. 简易浏览器实现
3.1 HTTP协议解析
-
ISO-OSI七层网络模型
-
Request结构
-
Response结构
-
创建sever
const http = require('http');
http.createServer((request, response) => {
let body = [];
request.on('error', (err) => {
console.error(err);
}).on('data', (chunk) => {
body.push(chunk);
}).on('end', () => {
body = Buffer.concat(body).toString();
console.log("body:", body);
response.writeHead(200, {
'Content-type': 'text/html'
});
response.end('Hello World\n');
});
}).listen(8088);
console.log('server started!')
- 创建client
//ChunkedBodyParser.js
class ChunkedBodyParser {
length = 0;
content = [];
isFinished = false;
state = this.waitingLength;
constructor() {}
parser(char) {
this.state = this.state(char);
}
waitingLength(char){
if(char === '\r') {
if(this.length === 0) {
this.isFinished = true;
return this.end;
}
return this.waitingLengthEnd;
}else {
this.length *= 16;
this.length += parseInt(char, 16);
return this.waitingLength;
}
}
waitingLengthEnd(char) {
if(char === '\n') {
return this.readingChunk;
}else {
return this.waitingLengthEnd;
}
}
readingChunk(char) {
this.content.push(char);
this.length -= char.length;
if(this.length === 0) {
return this.waitingNewLine;
}else {
return this.readingChunk;
}
}
waitingNewLine(char){
if(char === '\r') {
return this.waitingNewLineEnd;
}else {
return this.waitingNewLine;
}
}
waitingNewLineEnd(char) {
if(char === '\n') {
return this.waitingLength;
}else {
return this.waitingNewLineEnd;
}
}
end(char) {
return this.end;
}
}
module.exports = ChunkedBodyParser;
//ResponseParser.js
const ChunkedBodyParser = require('./ChunkedBodyParser');
class ResponseParser {
statusLine = '';
headers = {};
headerName = '';
headerValue = '';
bodyParser = null;
constructor() {}
get isFinished() {
return this.bodyParser && this.bodyParser.isFinished;
}
get response() {
this.statusLine.match(/HTTP\/1.1 ([0-9]+) ([\s\S]+)/);
return {
statusCode: RegExp.$1,
statusText: RegExp.$2,
headers: this.headers,
body: this.bodyParser.content.join('')
}
}
receive(str) {
this.state = this.buildStatusLine;
for(let position = 0; position < str.length; position++) {
let char = str.charAt(position);
this.state = this.state(str.charAt(position));
}
}
buildStatusLine(char){
if(char === '\r') {
return this.waitStatusLineEnd;
}else {
this.statusLine += char;
return this.buildStatusLine;
}
}
waitStatusLineEnd(char) {
if (char === '\n') {
return this.buildHeaderName;
} else {
return this.waitStatusLineEnd;
}
}
buildHeaderName(char) {
if(char === ':') {
return this.waitHeaderSpace;
}else if(char === '\r') {
return this.headerBuildEnd;
}else {
this.headerName += char;
return this.buildHeaderName;
}
}
waitHeaderSpace(char){
if(char === ' ') {
return this.buildHeaderValue;
}else {
return this.buildHeaderValue;
}
}
buildHeaderValue(char){
if(char === '\r') {
this.headers[this.headerName] = this.headerValue;
this.headerName = '';
this.headerValue = '';
return this.waitStatusLineEnd;
}else {
this.headerValue += char;
return this.buildHeaderValue;
}
}
waitHeaderLineEnd(char){
if(char === '\n') {
return this.buildHeaderName;
}else {
return this.waitHeaderLineEnd;
}
}
headerBuildEnd(char){
if(char === '\n') {
if(this.headers['Transfer-Encoding'] === 'chunked') {
this.bodyParser = new ChunkedBodyParser();
}
return this.buildBody;
}else {
return this.headerBuildEnd;
}
}
buildBody(char){
if(this.bodyParser) {
this.bodyParser.parser(char);
}
return this.buildBody;
}
}
module.exports = ResponseParser;
//client.js
const net = require('net');
const ResponseParser = require('./ResponseParser');
const HtmlParser = require('./HtmlParser');
class Request {
constructor(options) {
this.method = options.method || 'GET';
this.host = options.host;
this.port = options.port || 80;
this.path = options.path || '/';
this.body = options.body || {};
this.headers = options.headers || {};
if(!this.headers['Content-Type']) {
this.headers['Content-Type'] = 'application/x-www-form-urlencoded';
}
if(this.headers['Content-Type'] === 'application/json') {
this.bodyText = JSON.stringify(this.body);
}else if(this.headers['Content-Type'] === 'application/x-www-form-urlencoded') {
this.bodyText = Object.keys(this.body).map(key=>{
return `${key}=${encodeURIComponent(this.body[key])}`;
}).join('&');
}
this.headers['Content-length'] = this.bodyText.length;
}
toString(){
return `${this.method} ${this.path} HTTP/1.1\r
${Object.keys(this.headers).map(key=>`${key}: ${this.headers[key]}`).join('\r\n')}\r
\r
${this.bodyText}`;
}
send(connection) {
return new Promise((resolve, reject) => {
let parser = new ResponseParser();
if(connection) {
connection.write(this.toString());
}else {
connection = net.createConnection({
host: this.host,
port: this.port
}, () => {
connection.write(this.toString());
});
}
connection.on('data', data =>{
console.log(data.toString());
parser.receive(data.toString());
if(parser.isFinished) {
resolve(parser.response);
connection.end();
}
});
connection.on('error', err => {
reject(err);
connection.end();
})
})
}
}
void async function () {
let request = new Request({
method: 'POST',
host: '127.0.0.1',
port: '8088',
path: '/',
headers: {
['X-Foo2']: 'custom',
},
body: {
name: 'world'
}
});
let response = await request.send();
let dom = HtmlParser.parserHTML(response.body);
}();
3.2 HTML解析
- 使用有限状态机(FSM)实现
- html的标准中,已经规定了html的状态
- 简易浏览器只实现部分html状态
- 解析标签
标签分类:开始标签、结束标签、自封闭标签
- 创建元素
- 处理属性
- 用token构建DOM树
- 使用栈
- 遇到开始标签时创建元素并入栈,遇到结束标签时出栈
- 自封闭节点认为入栈后立即出栈
- 任何元素的父元素是他入栈前的栈顶元素
- 将文本节点加到DOM树
- 文本节点类似于自封闭标签,不会入栈
- 要注意多个文本节点的合并
//HtmlParser.js
let currentToken = null;
let currentAttribute = null;
let currentTextNode = null;
let root = {type: 'document', children: []};
let stack = [root];
const EOF = Symbol('EOF');
function emit(token) {
let top = stack[stack.length - 1];
if (token.type === 'startTag') {
let element = {
type: 'element',
children: [],
attributes: []
}
element.tagName = token.tagName;
for (let key in token) {
if (key !== 'type' && key !== 'tagName') {
element.attributes.push({
name: key,
value: token[key]
})
}
}
top.children.push(element);
element.parent = top;
if (!token.isSelfClosing) {
stack.push(element);
}
currentTextNode = null;
} else if (token.type === 'endTag') {
if (top.tagName !== token.tagName) {
throw new Error(token.tagName + " Tag start end doesn't match!");
} else {
stack.pop();
}
currentTextNode = null;
} else if (token.type === 'text') {
if (!currentTextNode) {
currentTextNode = {
type: 'text',
content: ''
}
top.children.push(currentTextNode);
}
currentTextNode.content += token.content;
}
}
function data(char) {
if (char === '<') {
return tagOpen;
} else if (char === EOF) {
emit({
type: 'EOF'
});
} else {
emit({
type: 'text',
content: char
})
return data;
}
}
function tagOpen(char) {
if (char === '/') {
return endTagOpen;
} else if (char.match('^[a-zA-Z]$')) {
currentToken = {
type: 'startTag',
tagName: ''
}
return tagName(char);
}
}
function endTagOpen(char) {
if (char.match('^[a-zA-Z]$')) {
currentToken = {
type: 'endTag',
tagName: ''
}
return tagName(char);
} else if (char === '>') {
} else if (char === EOF) {
} else {
}
}
function tagName(char) {
if (char.match(/^[\r\t\f ]$/)) {
return beforeAttributeName;
} else if (char === '/') {
return selfClosingStartTag;
} else if (char.match('^[a-zA-Z]$')) {
currentToken.tagName += char.toLowerCase();
return tagName;
} else if (char === '>') {
emit(currentToken);
return data;
} else {
return tagName;
}
}
//解析标签属性
function beforeAttributeName(char) {
if (char.match(/^[\t\n\f ]$/)) {
return beforeAttributeName;
} else if (char === '/' || char === '>' || char === EOF) {
return afterAttributeName(char);
} else if (char === '=') {
} else {
currentAttribute = {
name: '',
value: ''
}
return attributeName(char);
}
}
function attributeName(char) {
if (char === "=") {
return beforeAttributeValue;
} else if (char.match(/^[\r\t\f ]$/) || char === '/' || char === '>' || char === EOF) {
return afterAttributeName(c);
} else if (char === '\u0000') {
} else if (char === '"' || char === "'" || char === '<') {
} else {
currentAttribute.name += char;
return attributeName;
}
}
function afterAttributeName(char) {
}
function beforeAttributeValue(char) {
if (char.match(/^[\r\t\f ]$/) || char === '/' || char === '>' || char === EOF) {
return beforeAttributeValue;
} else if (char === '"') {
return doubleQuotedAttributeValue;
} else if (char === "'") {
return singleQuotedAttributeValue;
} else {
return unquotedAttributeValue(char);
}
}
function afterQuotedAttributeValue(char) {
if (char.match(/^[\r\t\f ]$/)) {
return beforeAttributeName;
} else if (char === '/') {
return selfClosingStartTag;
} else if (char === '>') {
currentToken[currentAttribute.name] = currentAttribute.value;
emit(currentToken);
return data;
} else if (char === EOF) {
} else {
currentAttribute.value += char;
return doubleQuotedAttributeValue;
}
}
function doubleQuotedAttributeValue(char) {
if (char === '"') {
currentToken[currentAttribute.name] = currentAttribute.value;
return afterQuotedAttributeValue;
} else if (char === '\u0000') {
} else if (char === EOF) {
} else {
currentAttribute.value += char;
return doubleQuotedAttributeValue;
}
}
function singleQuotedAttributeValue(char) {
if (char === "'") {
currentToken[currentAttribute.name] = currentAttribute.value;
return afterQuotedAttributeValue;
} else if (char === '\u0000') {
} else if (char === EOF) {
} else {
currentAttribute.value += char;
return doubleQuotedAttributeValue;
}
}
function unquotedAttributeValue(char) {
if (char.match(/^[\r\t\f ]$/)) {
currentToken[currentAttribute.name] = currentAttribute.value;
return beforeAttributeName;
} else if (char === '/') {
currentToken[currentAttribute.name] = currentAttribute.value;
return selfClosingStartTag;
} else if (char === '>') {
currentToken[currentAttribute.name] = currentAttribute.value;
emit(currentToken);
return data;
} else if (char === '\u0000') {
} else if (char === '"' || char === "'" || char === '<' || char === '=' || char === '`') {
} else if (char === EOF) {
} else {
currentAttribute.value += char;
return unquotedAttributeValue;
}
}
//属性处理结束
function selfClosingStartTag(char) {
if (char === '>') {
currentToken.isSelfClosing = true;
return data;
} else if (char === EOF) {
} else {
}
}
module.exports.parserHTML = function (html) {
let state = data;
for (let char of html) {
state = state(char);
}
state = state(EOF);
console.log(root);
}