浏览器工作原理——动手实现一个toy-browser（二）:生成DOM树和计算CSS

最新推荐文章于 2024-07-29 21:07:57 发布

Blateyang

最新推荐文章于 2024-07-29 21:07:57 发布

阅读量437

点赞数

分类专栏： Web学习文章标签： javascript 浏览器工作原理

本文链接：https://blog.csdn.net/Blateyang/article/details/116567938

版权

Web学习专栏收录该内容

19 篇文章 3 订阅

订阅专栏

接上一篇博客浏览器工作原理——动手实现一个toy-browser（一），本篇主要介绍浏览器工作流程的HTML解析和CSS计算环节。

文章目录

1 浏览器工作原理——解析HTML生成DOM树

1.1 HTML parser模块的文件拆分

设计HTML parser模块的使用接口
为便于文件管理，将parser模块拆分到其它文件中

1.2 用FSM实现HTML的分析

HTML标准中已经规定了HTML分析的各种状态，toy-browser只需要实现其中最基础的部分
用FSM分析HTML的框架如下

module.exports.parseHTML = function parseHTML(html) {
  let state = data
  for(let c of html) {
    state = state(c)
  }
  state = state(EOF) // 添加文件终结符，强制状态机结束
  return stack.pop()
}

1.3 解析标签

总共有3种标签：开始标签、结束标签和自封闭标签
解析HTML时的状态迁移画出图来比较直观（状态迁移图如下）

对应代码（1.1~1.5相关代码）如下

const EOF = Symbol('EOF')

function data(c) {
  if(c == "<") {
    return tagOpen
  }else if(c == EOF) {
    return 
  }else{
    currentToken = {
      type: "text",
      content: ""
    }
    return textNode(c)
  }
}

function textNode(c) {
  if(c != "<"){
    currentToken.content += c
    return textNode
  }else{
    emit(currentToken)
    return tagOpen
  }
}

function tagOpen(c) {
  if(c == '/') {
    return endTagOpen
  }else if(c.match(/^[a-zA-Z]$/)){
    currentToken = {
      type: "startTag",
      tagName: ""
    }
    return tagName(c)
  }else if(c == '!'){
    return doctypeCommentTag
  }else{
    return // 非法html
  }
}

function doctypeCommentTag(c) {
  if(c == '>') {
    return data
  }else{
    // 忽略<!DOCTYPE xxx>和<!-- xxx -->节点
    return doctypeCommentTag
  }
}

function endTagOpen(c) {
  if(c.match(/^[a-zA-Z]$/)) {
    currentToken = {
      type: "endTag",
      tagName: ""
    }
    return tagName(c)
  }else{
    return // 非法html
  }
}

function tagName(c) {
  if(c.match(/[\t\n\f ]/)) { // \f表示换页符
    return beforeAttributeName
  }else if(c.match(/^[a-zA-Z]$/)) {
    currentToken.tagName += c
    return tagName
  }else if(c == '/') {
    return selfClosingTag
  }else if(c == '>'){
    emit(currentToken)
    return data
  }else{
    return // 非法html
  }
}

function beforeAttributeName(c) {
  if(c.match(/[\t\n\f ]/)) {
    return beforeAttributeName
  }else if(c == '/' || c == '>' || c == EOF) {
    return afterAttributeName(c)
  }else if(c == '='){
    // 非法
  }else{
    // 获取属性
    currentAttribute = {
      name: '',
      value: ''
    }
    return attributeName(c)
  }
}

function afterAttributeName(c) {
  if(c == '/'){
    return selfClosingTag
  }else if(c == '>') {
    emit(currentToken)
    return data
  }else if(c == '='){
    return beforeAttributeValue
  }else if(c.match(/[\t\n\f ]/)){
    return afterAttributeName
  }else{ // EOF
    return 
  }
}

function attributeName(c) {
  if(c == '=') {
    return beforeAttributeValue
  }else if(c.match(/[\t\n\f ]/) || c == '/' || c == '>' || c == EOF) {
    return afterAttributeName(c)
  }else if(c == '\u0000' || c == '\"' || c == '\'' || c == '<') {
    // 非法,\u0000为二进制的0，在C语言中为字符串结束标志
  }else {
    currentAttribute.name += c
    return attributeName
  }
}

function beforeAttributeValue(c) {
  if(c.match(/[\t\n\f ]/)) {
    return beforeAttributeValue
  }else if(c == '\"') {
    return doubleQuotedAttributeValue
  }else if(c == '\'') {
    return singleQuotedAttributeValue
  }else if(c == '/' || c == '>' || c == EOF) {
    // 非法
  }else{
    return unquotedAttributeValue
  }
}

function doubleQuotedAttributeValue(c) {
  if(c == '\"') {// 解析完一个属性
    currentToken[currentAttribute.name] = currentAttribute.value
    return afterQuotedAttributeValue
  }else if(c == '\u0000' || c == EOF) {
    // 非法
  }else{
    currentAttribute.value += c
    return doubleQuotedAttributeValue
  }
}

function singleQuotedAttributeValue(c) {
  if(c == '\'') {// 解析完一个属性
    currentToken[currentAttribute.name] = currentAttribute.value
    return afterQuotedAttributeValue
  }else if(c == '\u0000' || c == EOF) {
    // 非法
  }else{
    currentAttribute.value += c
    return singleQuotedAttributeValue
  }  
}

function afterQuotedAttributeValue(c) {
  if(c.match(/[\t\n\f ]/)) {
    return beforeAttributeName
  }else if(c == '/') {
    return selfClosingTag
  }else if(c == '>') {
    // 下面的currentAttribute.name可能是在跳转到beforeAttributeName后转移过来的，不能省略
    currentToken[currentAttribute.name] = currentAttribute.value 
    emit(currentToken)
    return data
  }else if(c == EOF) {
    // 非法
  }else{
    currentAttribute.value += c
    // 此处针对的是属性值“后立即跟一个普通字符的情况，这时认为属性值还未结束
    return doubleQuotedAttributeValue 
  }
}

function unquotedAttributeValue(c) {
  if(c.match(/[\t\n\f ]/)) {
    return beforeAttributeName
  }else if(c == '/') {
    return selfClosingTag
  }else if(c == '>') {
    // 下面的currentAttribute.name可能是在跳转到beforeAttributeName后转移过来的，不能省略
    currentToken[currentAttribute.name] = currentAttribute.value 
    emit(currentToken)
    return data
  }else if(c == '\u0000' || c == '\"' || c == '\'' || c == '<' || c == EOF || c == '=') {
    // 非法
  }else{
    currentAttribute.vlaue += c
    return unquotedAttributeValue 
  }
}
function selfClosingTag(c) {
  if(c == '>') {
    currentToken.isSelfClosing = true
    emit(currentToken)
    return data
  }else{
    return // 非法html
  }
}

1.4 创建元素

在状态转移的过程中要加入业务逻辑
在标签结束状态提交创建的标签token，通过上图中的emit实现

1.5 处理属性

属性分为单引号、双引号和无引号，需要较多状态处理
处理属性的方式跟标签类似
属性结束时把属性加到标签Token上

1.6 用Token构建DOM树

根据标签构建DOM树的基本思想是使用栈（HTML的标签层层嵌套，符合栈先入后出的操作特点）
遇到开始标签时创建元素并入栈，遇到结束标签时出栈
自封闭标签可视作入栈后立即出栈，将
将其直接添加到当前栈顶元素的children列表中
任何元素的父元素是它入栈前的栈顶

1.7 将文本节点加到DOM树

文本字符需要合并为文本节点
文本节点与自封闭标签处理类似

1.6和1.7相关代码如下

// 生成DOM节点构建DOM树
function emit(token) {
  // console.log(token)
  let top = stack[stack.length-1]
  if(token.type == "startTag") {
    // 为开始标签创建节点
    let element = {
      type: "element",
      tagName: token.tagName,
      attributes: [],
      children: [],
      parent: top
    }
    for(let key in token) {
      if(key !== "type" && key !== "tagName" && key !== "isSelfClosing") {
        element.attributes.push({name: key, value: token[key]}) // 原写法{key, token[key]}错误
      }
    }
    computeCSS(element) // 在遇到startTag时就开始计算CSS
    if(!token.isSelfClosing) {
      stack.push(element) // 开始标签入栈
    }
    top.children.push(element)
  }else if(token.type == "endTag") {
    // 将结束标签出栈
    if(token.tagName !== top.tagName) {
      console.log(token.tagName + " doesn't match top element of stack!")
      return 
    }else{
      // 解析CSS文本获取CSS规则
      if(token.tagName === "style") {
        addCSSRules(top.children[0].content)
      }
      stack.pop()
    }
  }else if(token.type == "text") {
      currentTextNode = {
        type: "text",
        content: token.content,
        parent: top
      }
    top.children.push(currentTextNode)
  }
}

2 浏览器工作原理——CSS计算

2.1 收集CSS规则

遇到style标签，把css规则保存起来(暂不考虑link标签中的样式表)
为简化实现难度，使用node的css库中的CSS Parser来解析CSS规则
注意css库分析CSS规则的格式

2.2 添加CSS计算调用

当创建一个元素后，立即计算CSS（CSS设计的一条潜规则是CSS的所有选择器会尽量保证在startTag进入时判断是否匹配）
理论上，当分析一个元素时，所有CSS规则已经收集完毕（为简化实现，html标签的内联样式暂不考虑）
在真实浏览器中，可能遇到写在body中的style标签，需重新计算CSS，暂不考虑

2.3 获取父元素

在computeCSS函数中，必须知道元素的所有父元素才能判断元素与规则是否匹配
在解析HTML生成DOM树步骤中的stack，可以获取当前元素的所有父元素
因为首先获取和处理的是“当前元素”，所以获得和计算父元素匹配的顺序是从内向外（div div #myid)

2.4 选择器与元素匹配

选择器也要从当前元素向外排列
复杂选择器拆成单个元素的选择器，用循环匹配父元素队列

2.5 计算选择器与元素匹配

根据选择器的类型和元素属性，计算当前元素是否与规则匹配
视频教程中仅实现了三种简单选择器（元素选择器、类选择器和id选择器），实际还有更复杂的复合选择器、复杂选择器等
作业：跟上课程进度，实现复合选择器和支持空格的class选择器(使用正则)

2.6 生成computed属性

对于元素匹配到的规则，为元素生成相应的计算属性

2.7 specificity的计算逻辑

specificity被译作优先级或特定度，用来确定多条CSS规则作用于同一元素时的覆盖顺序

CSS规则是根据specificity和后来优先规则覆盖
specificity用一个四元组(inline, id, class, tag)表示，越左边权重越高
一个CSS规则的specificity根据包含的简单选择器相加而成
例如：
div div #myId的specificity: (0, 1, 0, 2)小于.cls #myId的specificity: (0, 1, 1, 0)

2.1~2.7相关代码

const css = require("css")
let currentToken = null
let currentAttribute = null
let currentTextNode = null
let stack = [{type: "document", children: []}] // 利用数组模拟栈（只在数组的一端操作）

let rules = []
function addCSSRules(text) {
  var ast = css.parse(text)
  rules.push(...ast.stylesheet.rules)
}

// selector:复合选择器 #myImg 或 p.text#name
function match(element, selector) {
  if(!selector || !element.attributes) {// 无attributes的文本节点不处理
    return false
  }
  let regMatches = selector.match(/([a-zA-Z]+)|(.[a-zA-Z]+)|(#[a-zA-Z]+)/g)
  for(let subSelector of regMatches) {
    let attr = ''
    if(subSelector.charAt(0) == "#") {// id选择器
      attr = element.attributes.filter(attr => attr.name === "id")[0]
      if(!attr || attr.value !== subSelector.substring(1)) {
        return false
      }
    }else if(subSelector.charAt(0) == ".") {// class选择器
      attr = element.attributes.filter(attr => attr.name === "class")[0]
      if(!attr || !attr.value.split(/[\t ]+/).includes(subSelector.substring(1))) {
        return false
      }
    }else if(subSelector !== element.tagName) {// 元素选择器
      return false
    }
  }
  return true
  // let attr = ''
  // if(selector.charAt(0) == "#") {// id选择器
  //   attr = element.attributes.filter(attr => attr.name === "id")[0]
  //   if(attr && attr.value === selector.substring(1)) {
  //     return true
  //   }
  // }
  // if(selector.charAt(0) == ".") {// class选择器
  //   attr = element.attributes.filter(attr => attr.name === "class")[0]
  //   if(attr && attr.value.split(/[\t ]+/).includes(selector.substring(1))) {
  //     return true
  //   }
  // }
  // if(selector == element.tagName) {// 元素选择器
  //   return true
  // }
  // return false
}

/*
计算css规则的specificity
*/
function computeSpecificity(selector) {
  let spec = [0, 0, 0, 0]
  let compSelectors = selector.split(" ") 
  for(let compSelector of compSelectors) {//compSelector:复合选择器 #myImg 或 p.text#name
    let regMatches = compSelector.match(/([a-zA-Z]+)|(.[a-zA-Z]+)|(#[a-zA-Z]+)/g)
    for(let subSelector of regMatches) {
      if(subSelector.charAt(0) == "#") {
        spec[1]++
      }else if(subSelector.charAt(0) == ".") {
        spec[2]++
      }else if(subSelector.match(/^[a-zA-Z]+/)) {
        spec[3]++
      }
    }
  }
  return spec
}


function compareSpecificity(spec1, spec2) {
  if(spec1[0] != spec2[0]) {
    return spec1[0] - spec2[0]
  }else if(spec1[1] != spec2[1]) {
    return spec1[1] - spec2[1]
  }else if(spec1[2] != spec2[2]) {
    return spec1[2] - spec2[2]
  }
  return spec1[3]-spec2[3]
}

function computeCSS(element) {
  // 深拷贝一份当前栈（包含当前元素的所有父元素）并由内向外排列
  var elements = stack.slice().reverse() 
  console.log("compute css for element", element)

  let matched = false
  for(let rule of rules){
    //CSS规则也由内向外，简单起见不考虑选择器列表和复杂选择器
    let selectorPaths = rule.selectors[0].split(" ").reverse() 
    if(!match(element, selectorPaths[0])) {
      continue
    }
    // 由内向外循环遍历父元素队列，判断当前元素是否能匹配当前规则
    let j=1
    for(let i=0; i<elements.length; ++i) {
      if(match(elements[i], selectorPaths[j])) {
        ++j
      }
    }
    if(j >= selectorPaths.length) {
      matched = true
    }

    if(matched) {
      console.log("Element", element, "matched rule", rule)
      if(!element.computedStyle) {
        element.computedStyle = {}
      }
      let sp = computeSpecificity(rule.selectors[0])
      // if(!element.computedStyle.specificity) {
      //   element.computedStyle.specificity = sp
      //   for(let declaration of rule.declarations) {
      //     if(!element.computedStyle[declaration.property]) {
      //       element.computedStyle[declaration.property] = {}
      //     }
      //     element.computedStyle[declaration.property].value = declaration.value
      //   }
      // }else{
        for(let declaration of rule.declarations) {
          if(!element.computedStyle[declaration.property]) {
            element.computedStyle[declaration.property] = {}
          }
          if(!element.computedStyle[declaration.property].specificity) {
            element.computedStyle[declaration.property].value = declaration.value
            element.computedStyle[declaration.property].specificity = sp
          }
          // 针对同一元素同一css属性的多条规则，取优先级更高或相同但后出现的规则
          if(compareSpecificity(element.computedStyle[declaration.property].specificity, sp) < 0){
            element.computedStyle[declaration.property].value = declaration.value
            element.computedStyle[declaration.property].specificity = sp
          }
        }

      // }
    }
  }
}