phpword模版操作支持解析html标签
phpword模版替换插入html实现以及表格合并列的部分实现
phpword本身自带的模版替换类库只能替换文本、图片等变量,插入html内容的变量时或者提示文件损坏或者不能正确显示。本文主要解决替换内容为html代码时能正确显示,实际使用与文本替换一致的格式进行html代码转换,如下图:
实测大部分html标签及属性可支持,没有的自行扩展phpword的html类库(PhpOffice\PhpWord\Shared\Html)。
另外由于word表格与html表格合并列的方式不同,phpword没有支持表格纵向合并的rowspan,实际项目涉及表格合并列,所以修改了html类库以部分实现html的 rowspan属性,目前能实现第一列的rowspan可正常转换。调用方式与文本替换方式一致,demo.docx内容中与普通文本替换一样,只要在替换位置写入${变量名}即可。如多个替换变量就使用setHtmlValues,将数据打包成键名键值数组顶部引用扩展类:
use PhpOffice\PhpWord\IOFactory;
use app\common\library\TemplateProcessor;
实际代码:
$tmp=new TemplateProcessor('test/demo.docx');//打开模板文件
$tmp->setHtmlValue('testHtml','<p>这是<strong>测试文档</strong></p>'):'';//单个变量批量替换
$tmp->setHtmlValues($htmlData);//多个批量替换
新建自定义类,文件名TemplateProcessor.php,以扩展phpword的TemplateProcessor类;实际项目为thinkphp5.1版本,实体文件放置在\application\common\library\下
<?php
namespace app\common\library;
use PhpOffice\PhpWord\Element\TextBox;
use PhpOffice\PhpWord\Shared\Html;
use PhpOffice\PhpWord\Shared\XMLWriter;
use PhpOffice\PhpWord\TemplateProcessor as TplProcessor;
use PhpOffice\PhpWord\Writer\Word2007\Element\Container;
class TemplateProcessor extends TplProcessor {
public function setHtmlValue($search, $markup)
{
$wrapper = new TextBox();
Html::addHtml($wrapper, $markup);
$xmlWriter = new XMLWriter();
$containerWriter = new Container($xmlWriter, $wrapper, false);
$containerWriter->write();
$this->replaceXmlBlock($search, $xmlWriter->getData(), 'w:p');
}
public function setHtmlValues(array $values): void
{
foreach ($values as $macro => $replace) {
$this->setHtmlValue($macro, $replace);
}
}
}
phpword修改(实现rowspan)
修改PhpOffice\PhpWord\Shared\Html.php,主要增加td的rowspan属性判断及转换
<?php
/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/
namespace PhpOffice\PhpWord\Shared;
use DOMAttr;
use DOMDocument;
use DOMNode;
use DOMXPath;
use Exception;
use PhpOffice\PhpWord\Element\AbstractContainer;
use PhpOffice\PhpWord\Element\Row;
use PhpOffice\PhpWord\Element\Table;
use PhpOffice\PhpWord\Settings;
use PhpOffice\PhpWord\SimpleType\Jc;
use PhpOffice\PhpWord\SimpleType\NumberFormat;
use PhpOffice\PhpWord\Style\Paragraph;
/**
* Common Html functions.
*
* @SuppressWarnings(PHPMD.UnusedPrivateMethod) For readWPNode
*/
class Html
{
protected static $listIndex = 0;
protected static $xpath;
protected static $options;
protected static $needRow=0;
/**
* Add HTML parts.
*
* Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter
* Warning: Do not pass user-generated HTML here, as that would allow an attacker to read arbitrary
* files or perform server-side request forgery by passing local file paths or URLs in <img>.
*
* @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added
* @param string $html The code to parse
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
* @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed
*/
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null): void
{
/*
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
* which could be applied when such an element occurs in the parseNode function.
*/
self::$options = $options;
// Preprocess: remove all line ends, decode HTML entity,
// fix ampersand and angle brackets and add body tag for HTML fragments
$html = str_replace(["\n", "\r"], '', $html);
$html = str_replace(['<', '>', '&', '"'], ['_lt_', '_gt_', '_amp_', '_quot_'], $html);
$html = html_entity_decode($html, ENT_QUOTES, 'UTF-8');
$html = str_replace('&', '&', $html);
$html = str_replace(['_lt_', '_gt_', '_amp_', '_quot_'], ['<', '>', '&', '"'], $html);
if (false === $fullHTML) {
$html = '<body>' . $html . '</body>';
}
// Load DOM
if (\PHP_VERSION_ID < 80000) {
$orignalLibEntityLoader = libxml_disable_entity_loader(true);
}
$dom = new DOMDocument();
$dom->preserveWhiteSpace = $preserveWhiteSpace;
$dom->loadXML($html);
self::$xpath = new DOMXPath($dom);
$node = $dom->getElementsByTagName('body');
self::parseNode($node->item(0), $element);
if (\PHP_VERSION_ID < 80000) {
libxml_disable_entity_loader($orignalLibEntityLoader);
}
}
/**
* parse Inline style of a node.
*
* @param DOMNode $node Node to check on attributes and to compile a style array
* @param array $styles is supplied, the inline style attributes are added to the already existing style
*
* @return array
*/
protected static function parseInlineStyle($node, $styles = [])
{
if (XML_ELEMENT_NODE == $node->nodeType) {
$attributes = $node->attributes; // get all the attributes(eg: id, class)
foreach ($attributes as $attribute) {
$val = $attribute->value;
switch (strtolower($attribute->name)) {
case 'align':
$styles['alignment'] = self::mapAlign(trim($val));
break;
case 'lang':
$styles['lang'] = $val;
break;
case 'width':
// tables, cells
if (false !== strpos($val, '%')) {
// e.g. <table width="100%"> or <td width="50%">
$styles['width'] = (int) $val * 50;
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT;
} else {
// e.g. <table width="250> where "250" = 250px (always pixels)
$styles['width'] = Converter::pixelToTwip($val);
$styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::TWIP;
}
break;
case 'cellspacing':
// tables e.g. <table cellspacing="2">, where "2" = 2px (always pixels)
$val = (int) $val . 'px';
$styles['cellSpacing'] = Converter::cssToTwip($val);
break;
case 'cellpadding':
// tables e.g. <table cellspacing="2">, where "2" = 2px (always pixels)
$val = (int) $val . 'px';
$styles['cellMargin'] = Converter::cssToTwip($val);
break;
case 'bgcolor':
// tables, rows, cells e.g. <tr bgColor="#FF0000">
$styles['bgColor'] = trim($val, '# ');
break;
case 'valign':
// cells e.g. <td valign="middle">
if (preg_match('#(?:top|bottom|middle|baseline)#i', $val, $matches)) {
$styles['valign'] = self::mapAlignVertical($matches[0]);
}
break;
}
}
$attributeStyle = $attributes->getNamedItem('style');
if ($attributeStyle) {
$styles = self::parseStyle($attributeStyle, $styles);
}
}
return $styles;
}
/**
* Parse a node and add a corresponding element to the parent element.
*
* @param DOMNode $node node to parse
* @param \PhpOffice\PhpWord\Element\AbstractContainer $element object to add an element corresponding with the node
* @param array $styles Array with all styles
* @param array $data Array to transport data to a next level in the DOM tree, for example level of listitems
*/
protected static function parseNode($node, $element, $styles = [], $data = []): void
{
// Populate styles array
$styleTypes = ['font', 'paragraph', 'list', 'table', 'row', 'cell'];
foreach ($styleTypes as $styleType) {
if (!isset($styles[$styleType])) {
$styles[$styleType] = [];
}
}
// Node mapping table
$nodes = [
// $method $node $element $styles $data $argument1 $argument2
'p' => ['Paragraph', $node, $element, $styles, null, null, null],
'h1' => ['Heading', null, $element, $styles, null, 'Heading1', null],
'h2' => ['Heading', null, $element, $styles, null, 'Heading2', null],
'h3' => ['Heading', null, $element, $styles, null, 'Heading3', null],
'h4' => ['Heading', null, $element, $styles, null, 'Heading4', null],
'h5' => ['Heading', null, $element, $styles, null, 'Heading5', null],
'h6' => ['Heading', null, $element, $styles, null, 'Heading6', null],
'#text' => ['Text', $node, $element, $styles, null, null, null],
'strong' => ['Property', null, null, $styles, null, 'bold', true],
'b' => ['Property', null, null, $styles, null, 'bold', true],
'em' => ['Property', null, null, $styles, null, 'italic', true],
'i' => ['Property', null, null, $styles, null, 'italic', true],
'u' => ['Property', null, null, $styles, null, 'underline', 'single'],
'sup' => ['Property', null