JavaScript将包含svg图片的Html转为word显示

背景

最近有一个需求,在表格中选择数据,导出为word文档方便编辑,其中数据中包含svg标签图片,本来沟通是后端人员直接处理好,返回文件链接,前端预览下载就好了。因为svg图片在word中不能很好的显示,需要将svg格式的处理成其他格式的图片。但是后台人员在处理后发现图片内容为空,于是让前端来处理。

html-docx-js

用于将 HTML 内容转换为 Word 文档的格式的JavaScript库。它提供了简单的 API,在浏览器环境中轻松地生成并导出 Word 文档。

测试demo

整体思路是先将html中svg图片替换为base64编码格式,然后将src为base64编码的img元素替换svg图片,然后导出为doc。代码如下:

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Document</title>
  </head>
  <body>
    <button id="exportButton">Export to Word</button>
    <script src="https://unpkg.com/html-docx-js/dist/html-docx.js"></script>
    <script>
      document.addEventListener("DOMContentLoaded", function () {
        // 在这里执行您的 JavaScript 代码
        document.getElementById("exportButton").addEventListener("click", function () {
            // HTML 内容
            // HTML 内容
            let htmlContent = `
              <html>
                <head>
                  <title>20240422160522306</title>
                </head>
                <body style="font-family: SimSun">
                  <div>考试科目:AP English</div>
                  <div style="font-size: 14px; font-weight: normal; margin-top: 30px">
                    1、(<span style="color: red">null</span>【6405157】)5) Mr. Blake’s
                    paragraph template has
                  </div>
                  <div style="font-size: 14px">A、no topic or concluding sentence</div>
                  <div style="font-size: 14px">
                    B、two commentaries with two examples for each
                  </div>
                  <div style="font-size: 14px">
                    C、unnecessary words and vague generalizations
                  </div>
                  <div style="font-size: 14px">
                    &nbsp;&nbsp; &nbsp;<mjx-container
                      class="MathJax CtxtMenu_Attached_0"
                      jax="SVG"
                      display="true"
                      aria-label="StartFraction n plus 4 Over 10 n EndFraction"
                      sre-explorer-id="1"
                      tabindex="0"
                      role="application"
                      ctxtmenu_oldtabindex="1"
                      ctxtmenu_counter="1"
                      style="position: relative"
                    >
                      <svg
                        xmlns="http://www.w3.org/2000/svg"
                        width="6.25ex"
                        height="4.663ex"
                        role="img"
                        focusable="false"
                        viewbox="0 -1353 2762.4 2061"
                        xmlns:xlink="http://www.w3.org/1999/xlink"
                        aria-hidden="true"
                        style="vertical-align: -1.602ex"
                      >
                        <defs>
                          <path
                            id="MJX-2-TEX-I-1D45B"
                            d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"
                          ></path>
                          <path
                            id="MJX-2-TEX-N-2B"
                            d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"
                          ></path>
                          <path
                            id="MJX-2-TEX-N-34"
                            d="M462 0Q444 3 333 3Q217 3 199 0H190V46H221Q241 46 248 46T265 48T279 53T286 61Q287 63 287 115V165H28V211L179 442Q332 674 334 675Q336 677 355 677H373L379 671V211H471V165H379V114Q379 73 379 66T385 54Q393 47 442 46H471V0H462ZM293 211V545L74 212L183 211H293Z"
                          ></path>
                          <path
                            id="MJX-2-TEX-N-31"
                            d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"
                          ></path>
                          <path
                            id="MJX-2-TEX-N-30"
                            d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"
                          ></path>
                          <path id="MJX-2-TEX-N-2062" d=""></path>
                        </defs>
                        <g
                          stroke="currentColor"
                          fill="currentColor"
                          stroke-width="0"
                          transform="scale(1,-1)"
                        >
                          <g data-mml-node="math">
                            <g
                              data-mml-node="mfrac"
                              data-semantic-type="fraction"
                              data-semantic-role="division"
                              data-semantic-id="8"
                              data-semantic-children="3,7"
                              data-semantic-speech="StartFraction n plus 4 Over 10 n EndFraction"
                            >
                              <g
                                data-mml-node="mrow"
                                data-semantic-type="infixop"
                                data-semantic-role="addition"
                                data-semantic-id="3"
                                data-semantic-children="0,2"
                                data-semantic-content="1"
                                data-semantic-parent="8"
                                transform="translate(220,676)"
                              >
                                <g
                                  data-mml-node="mi"
                                  data-semantic-type="identifier"
                                  data-semantic-role="latinletter"
                                  data-semantic-font="italic"
                                  data-semantic-annotation="clearspeak:simple"
                                  data-semantic-id="0"
                                  data-semantic-parent="3"
                                >
                                  <use data-c="1D45B" xlink:href="#MJX-2-TEX-I-1D45B"></use>
                                </g>
                                <g
                                  data-mml-node="mo"
                                  data-semantic-type="operator"
                                  data-semantic-role="addition"
                                  data-semantic-id="1"
                                  data-semantic-parent="3"
                                  data-semantic-operator="infixop,+"
                                  transform="translate(822.2,0)"
                                >
                                  <use data-c="2B" xlink:href="#MJX-2-TEX-N-2B"></use>
                                </g>
                                <g
                                  data-mml-node="mn"
                                  data-semantic-type="number"
                                  data-semantic-role="integer"
                                  data-semantic-font="normal"
                                  data-semantic-annotation="clearspeak:simple"
                                  data-semantic-id="2"
                                  data-semantic-parent="3"
                                  transform="translate(1822.4,0)"
                                >
                                  <use data-c="34" xlink:href="#MJX-2-TEX-N-34"></use>
                                </g>
                              </g>
                              <g
                                data-mml-node="mrow"
                                data-semantic-type="infixop"
                                data-semantic-role="implicit"
                                data-semantic-annotation="clearspeak:simple;clearspeak:unit"
                                data-semantic-id="7"
                                data-semantic-children="4,5"
                                data-semantic-content="6"
                                data-semantic-parent="8"
                                transform="translate(581.2,-686)"
                              >
                                <g
                                  data-mml-node="mn"
                                  data-semantic-type="number"
                                  data-semantic-role="integer"
                                  data-semantic-font="normal"
                                  data-semantic-annotation="clearspeak:simple"
                                  data-semantic-id="4"
                                  data-semantic-parent="7"
                                >
                                  <use data-c="31" xlink:href="#MJX-2-TEX-N-31"></use>
                                  <use
                                    data-c="30"
                                    xlink:href="#MJX-2-TEX-N-30"
                                    transform="translate(500,0)"
                                  ></use>
                                </g>
                                <g
                                  data-mml-node="mo"
                                  data-semantic-type="operator"
                                  data-semantic-role="multiplication"
                                  data-semantic-id="6"
                                  data-semantic-parent="7"
                                  data-semantic-added="true"
                                  data-semantic-operator="infixop,⁢"
                                  transform="translate(1000,0)"
                                >
                                  <use data-c="2062" xlink:href="#MJX-2-TEX-N-2062"></use>
                                </g>
                                <g
                                  data-mml-node="mi"
                                  data-semantic-type="identifier"
                                  data-semantic-role="latinletter"
                                  data-semantic-font="italic"
                                  data-semantic-annotation="clearspeak:simple"
                                  data-semantic-id="5"
                                  data-semantic-parent="7"
                                  transform="translate(1000,0)"
                                >
                                  <use data-c="1D45B" xlink:href="#MJX-2-TEX-I-1D45B"></use>
                                </g>
                              </g>
                              <rect width="2522.4" height="60" x="120" y="220"></rect>
                            </g>
                          </g>
                        </g>
                      </svg>
                    </mjx-container>
                  </div>
                  <br />
                  <div style="font-size: 14px">答案:D</div>
                  <div style="font-size: 14px">
                    答案解析:The correct answer is d. two examples with two commentaries on
                    each because Mr. Blake's paragraph template includes two examples along
                    with two commentaries for each example. This structure allows for the
                    presentation of evidence or examples followed by analysis or commentary to
                    support the main point or argument of the paragraph. The other options,
                    such as no topic or concluding sentence, two commentaries with two
                    examples for each, or unnecessary words and vague generalizations, do not
                    accurately describe the structure and content of Mr. Blake's paragraph
                    template.
                  </div>
                  <div style="color: red; font-size: 14px">
                    考核知识点:【202308版】Lecture 5>>Development of Ideas>>
                  </div>
                </body>
              </html>
            `;

            // 将 HTML 转换为 Word 文档
            // 替换 SVG 标签为图像
            htmlContent = replaceSVGWithImage(htmlContent);
            const converted = htmlDocx.asBlob(htmlContent);

            // 创建链接以下载生成的 Word 文件
            const link = document.createElement("a");
            link.href = window.URL.createObjectURL(converted);
            link.download = "document.docx";
            link.click();

            function escapeRegExp(string) {
              return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
            }

            function replaceSVGWithImage(html) {
              // 创建一个新的div元素
              const container = document.createElement("div");
              // 将HTML内容插入到新创建的div中
              container.innerHTML = html;
              // 在当前文档中插入新创建的div
              document.body.appendChild(container);

              // 使用新创建的div来获取SVG元素
              const svgElements = container.querySelectorAll("svg");
              console.log(svgElements);

              svgElements.forEach(function (svgElement) {
                // 创建一个新的img元素
                const imgElement = document.createElement("img");
                // 设置img元素的src属性为SVG内容的base64编码
                imgElement.src =
                  "data:image/svg+xml;base64," +
                  btoa(
                    unescape(
                      encodeURIComponent(
                        new XMLSerializer().serializeToString(svgElement)
                      )
                    )
                  );
                let widthFromAttribute =  svgElement.getAttribute("wight");
                let heightFromAttribute = svgElement.getAttribute("height");

                // 有时width和height可能是数值类型没有单位,有时候会有单位比如'px',需要做相应处理
                if (typeof widthFromAttribute === 'string') {
                  widthFromAttribute = parseFloat(widthFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
                }

                if (typeof heightFromAttribute === 'string') {
                  heightFromAttribute = parseFloat(heightFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
                }

                // 通过CSS样式获取计算后的实际宽高(考虑了样式表的影响)
                var computedWidth = window.getComputedStyle(svgElement).width;
                var computedHeight = window.getComputedStyle(svgElement).height;


                imgElement.width = Number.parseInt(computedWidth);
                imgElement.height = Number.parseInt(computedHeight);
                // 将img元素插入到SVG元素之前
                svgElement.parentNode.insertBefore(imgElement, svgElement);
                // 删除原始的SVG元素
                svgElement.parentNode.removeChild(svgElement);
              });

              // 获取替换后的HTML内容
              const modifiedHtmlContent = container.innerHTML;

              // 清理新创建的div
              document.body.removeChild(container);

              return modifiedHtmlContent;
            }
          });
      });
    </script>
  </body>
</html>

成功导出,效果如下:

集成到vue项目中 

项目是vue2.0的技术栈,想着改下写法就大功告成了,代码如下:

import htmlDocx from 'html-docx-js';

/**
 * @description: 将html内容中的svg替换为支持word显示的base64图片编码
 * @param {*} htmlContent
 * @return {*} modifiedHtmlContent
 */
export const handleSvgToBase64 = (htmlContent) => {
  if (htmlContent === null || typeof htmlContent !== 'string') return;

  // 创建div,将HTML内容插入到div中
  const container = document.createElement('div');
  container.innerHTML = htmlContent;
  document.body.appendChild(container);

  // 获取其中的SVG元素
  const svgElements = container.querySelectorAll('svg');

  if (!svgElements.length) return;
  svgElements.forEach((svgElement) => {
    const imgElement = document.createElement('img');

    // 设置img元素的src属性为SVG内容的base64编码
    imgElement.src =
      'data:image/svg+xml;base64,' +
      btoa(unescape(encodeURIComponent(new XMLSerializer().serializeToString(svgElement))));
    let widthFromAttribute = svgElement.getAttribute('wight');
    let heightFromAttribute = svgElement.getAttribute('height');
    // 有时width和height可能是数值类型没有单位,有时候会有单位比如'px',做相应处理
    if (typeof widthFromAttribute === 'string') {
      widthFromAttribute = parseInt(widthFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }
    if (typeof heightFromAttribute === 'string') {
      heightFromAttribute = parseInt(heightFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }

    // 通过CSS样式获取计算后的实际宽高(考虑了样式表的影响)
    let computedWidth = window.getComputedStyle(svgElement).width;
    let computedHeight = window.getComputedStyle(svgElement).height;
    // 设置图片的宽高
    imgElement.width = Number.parseInt(computedWidth);
    imgElement.height = Number.parseInt(computedHeight);
    // 将img元素插入到SVG元素之前
    svgElement.parentNode.insertBefore(imgElement, svgElement);
    // 删除原始的SVG元素
    svgElement.parentNode.removeChild(svgElement);
  });

  // 获取替换后的HTML内容
  const modifiedHtmlContent = container.innerHTML;
  // 清理新创建的div
  document.body.removeChild(container);

  return modifiedHtmlContent;
};

/**
 * @description: 导出word文件
 * @param {*} title
 * @param {*} modifiedHtmlContent
 * @return {*}
 */
export const htmlContentToDocx = async(modifiedHtmlContent, title = 'document', type = 'docx') => {

  const converted = htmlDocx.asBlob(modifiedHtmlContent);

  // 创建链接以下载生成的 Word 文件
  const link = document.createElement("a");
  link.href = window.URL.createObjectURL(converted);
  link.download = "document.docx";
  link.click();
  
};

vue组件文件中使用

import { handleSvgToBase64, htmlContentToDocx } from '@/utils/handleHtmlToDocx';

method: {
    /**
     * @description: 选择试题导出word文件
     * @return {*}
    */
    async onExportWord() {
        if (!this.selectAllList || !this.selectAllList.length) {
          this.$message.warning('请选择试题!');
          return false;
        }
        this.exportLoading = true;
        const questionIds = this.selectAllList.map((x) => x.id);
        const param = {
          subjectId: this.getClassListParams.subjectId,
          questionIds,
        };
        try {
          const { code, data } = await this.$api.tiku.getQuestionContent(param);
          if (code !== 200 || data === null) return false;

          let modifiedHtmlContent = await handleSvgToBase64(data);
          // 将 HTML 内容转换为 Word 文档
          let title = `${this.getClassListParams.subjectId}-${new Date().getTime()}`
          htmlContentToDocx(modifiedHtmlContent, title, 'docx');
        } finally {
          this.exportLoading = false;
        }
      }
}

结果却是出乎意料:

下面是ChatGPT的回答

html-to-doxc

听老哥的,换成html-to-doxc,改下写法

import htmlToDocx from 'html-to-docx';

/**
 * @description: 将html内容中的svg替换为支持word显示的base64图片编码
 * @param {*} htmlContent
 * @return {*} modifiedHtmlContent
 */
export const handleSvgToBase64 = (htmlContent) => {
  if (htmlContent === null || typeof htmlContent !== 'string') return;

  // 创建div,将HTML内容插入到div中
  const container = document.createElement('div');
  container.innerHTML = htmlContent;
  document.body.appendChild(container);

  // 获取其中的SVG元素
  const svgElements = container.querySelectorAll('svg');

  if (!svgElements.length) return;
  svgElements.forEach((svgElement) => {
    const imgElement = document.createElement('img');

    // 设置img元素的src属性为SVG内容的base64编码
    imgElement.src =
      'data:image/svg+xml;base64,' +
      btoa(unescape(encodeURIComponent(new XMLSerializer().serializeToString(svgElement))));
    let widthFromAttribute = svgElement.getAttribute('wight');
    let heightFromAttribute = svgElement.getAttribute('height');
    // 有时width和height可能是数值类型没有单位,有时候会有单位比如'px',做相应处理
    if (typeof widthFromAttribute === 'string') {
      widthFromAttribute = parseInt(widthFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }
    if (typeof heightFromAttribute === 'string') {
      heightFromAttribute = parseInt(heightFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }

    // 通过CSS样式获取计算后的实际宽高(考虑了样式表的影响)
    let computedWidth = window.getComputedStyle(svgElement).width;
    let computedHeight = window.getComputedStyle(svgElement).height;
    // 设置图片的宽高
    imgElement.width = Number.parseInt(computedWidth);
    imgElement.height = Number.parseInt(computedHeight);
    // 将img元素插入到SVG元素之前
    svgElement.parentNode.insertBefore(imgElement, svgElement);
    // 删除原始的SVG元素
    svgElement.parentNode.removeChild(svgElement);
  });

  // 获取替换后的HTML内容
  const modifiedHtmlContent = container.innerHTML;
  // 清理新创建的div
  document.body.removeChild(container);

  return modifiedHtmlContent;
};

/**
 * @description: 导出word文件
 * @param {*} title
 * @param {*} modifiedHtmlContent
 * @return {*}
 */
export const htmlContentToDocx = async(modifiedHtmlContent, title = 'document', type = 'docx') => {
  
  const docxBuffer = await htmlToDocx(modifiedHtmlContent);
  // 下载生成的 Word 文件
  const blob = new Blob([docxBuffer], {
    type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
  });
  const link = document.createElement('a');
  link.href = window.URL.createObjectURL(blob);
  link.download = `${title}.docx`;
  link.click();
};

导出成功了,但是又出现了问题,就是部分样式格式出现了问题,查看html是没有问题的...

最终方案

最终尝试了下直接以blob导出为html形式的.docx文件,发现效果还可以,也能满足预览编辑复制这些需求,于是最终这种方式


/**
 * @description: 将html内容中的svg替换为支持word显示的base64图片编码
 * @param {*} htmlContent
 * @return {*} modifiedHtmlContent
 */
export const handleSvgToBase64 = (htmlContent) => {
  if (htmlContent === null || typeof htmlContent !== 'string') return;

  // 创建div,将HTML内容插入到div中
  const container = document.createElement('div');
  container.innerHTML = htmlContent;
  document.body.appendChild(container);

  // 获取其中的SVG元素
  const svgElements = container.querySelectorAll('svg');

  if (!svgElements.length) return;
  svgElements.forEach((svgElement) => {
    const imgElement = document.createElement('img');

    // 设置img元素的src属性为SVG内容的base64编码
    imgElement.src =
      'data:image/svg+xml;base64,' +
      btoa(unescape(encodeURIComponent(new XMLSerializer().serializeToString(svgElement))));
    let widthFromAttribute = svgElement.getAttribute('wight');
    let heightFromAttribute = svgElement.getAttribute('height');
    // 有时width和height可能是数值类型没有单位,有时候会有单位比如'px',做相应处理
    if (typeof widthFromAttribute === 'string') {
      widthFromAttribute = parseInt(widthFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }
    if (typeof heightFromAttribute === 'string') {
      heightFromAttribute = parseInt(heightFromAttribute.replace(/[^-\d\.]/g, '')); // 去除单位
    }

    // 通过CSS样式获取计算后的实际宽高(考虑了样式表的影响)
    let computedWidth = window.getComputedStyle(svgElement).width;
    let computedHeight = window.getComputedStyle(svgElement).height;
    // 设置图片的宽高
    imgElement.width = Number.parseInt(computedWidth);
    imgElement.height = Number.parseInt(computedHeight);
    // 将img元素插入到SVG元素之前
    svgElement.parentNode.insertBefore(imgElement, svgElement);
    // 删除原始的SVG元素
    svgElement.parentNode.removeChild(svgElement);
  });

  // 获取替换后的HTML内容
  const modifiedHtmlContent = container.innerHTML;
  // 清理新创建的div
  document.body.removeChild(container);

  return modifiedHtmlContent;
};

/**
 * @description: 导出word文件
 * @param {*} title
 * @param {*} modifiedHtmlContent
 * @return {*}
 */
export const htmlContentToDocx = async(modifiedHtmlContent, title = 'document', type = 'docx') => {
  
  // 直接导出为html格式
  // 创建 Blob 对象
  const blob = new Blob([modifiedHtmlContent], { type: 'text/html' });

  // 创建链接以下载生成的 HTML 文件
  const link = document.createElement('a');
  link.href = window.URL.createObjectURL(blob);
  // link.download = `${title}.html`;
  link.download = `${title}.docx`;
  link.click();
};

效果如下:

总结 

只能说是用折中的方式实现了需求,至于html-to-doxc导出格式错乱的问题并没有解决,希望有大佬看到可以指导下原因。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值