php解析word文档

一个简单的word文档阅读类,使用正则实现简单的docx文档阅读,下面是代码

<?php
class Lib {
	/**
	 * @param $file docx文件路径
	 * @return string 生成的html字符串
	 * ---读取docx文档转换为html,仅保留段落,表格,文本框,不保留样式
	 * by sdxjwkq
	 */
	public function docxToHtml($file) {
		$zip = new \ZipArchive();
		$zip->open($file);
		$xml = $zip->getFromName("word/document.xml");
		$table = array(); //缓存表格
		$textbox = array(); //缓存文本框
		//处理表格
		preg_match_all('/<w:tbl>([\s\S]*?)<\/w:tbl>/s', $xml, $tableHandel);
		for ($i = 0; $i < count($tableHandel[0]); $i++) {
			$table["@TABLECONTENT@" . $i] = $tableHandel[0][$i];
			$xml = str_replace($tableHandel[0][$i], "@TABLECONTENT@" . $i, $xml);
		}
		//处理文本框
		preg_match_all('/<w:pict>([\s\S]*?)<\/w:pict>/s', $xml, $textboxHandel);
		foreach ($textboxHandel[0] as $key => &$value) {
			$temp = $value;
			$temp2 = "";
			preg_match_all('/<w:t>([\s\S]*?)<\/w:t>/s', $value, $div);
			foreach ($div[0] as $k => &$v) {
				$temp2 .= $v;
			}
			$xml = str_replace($temp, $temp2, $xml);
		}
		for ($i = 0; $i < count($textboxHandel[0]); $i++) {
			$textbox["@TEXTBOXCONTENT@" . $i] = $textboxHandel[0][$i];
			$xml = str_replace($textboxHandel[0][$i], "@TEXTBOXCONTENT@" . $i, $xml);
		}
		preg_match_all('/<w:p([\s\S]*?)<\/w:p>|@TABLECONTENT@\d|@TEXTBOXCONTENT@\d/s', $xml, $content);
		foreach ($content[0] as $key => &$value) {
			if (strpos($value, "TABLECONTENT")) {
				$value = $table[$value];
			}
			$value = str_replace("w:", "", $value);
		}
		$content = $content[0]; //把段落和表格解析出来
		$docx = <<<HTML_ENTITIES
<style>
	table{
		background-color:#000;
	}
	table td{
		padding:5px 5px 5px 5px;
	}
	table tr{
		background-color:#fff;
	}
</style>
HTML_ENTITIES;
		foreach ($content as $a => &$b) {
			$b = json_decode(
				json_encode(
					simplexml_load_string($b)
				),
				true
			);
			if (isset($b['tr'])) {
				//表格
				$docx .= "<table border='0' cellspacing='1' cellpadding='0'>";
				foreach ($b['tr'] as $key => $value) {
					$docx .= "<tr>";
					foreach ($value['tc'] as $k => $v) {
						if (isset($v['p']['r'][0])) {
							$docx .= "<td>";
							foreach ($v['p']['r'] as $ke => $va) {
								$docx .= $va['t'];
							}
							$docx .= "</td>";
						} else {
							$docx .= "<td>" . $v['p']['r']['t'] . "</td>";
						}

					}
					$docx .= "</tr>";
				}
				$docx .= "</table>";
			} else {
				//段落
				$docx .= "<p>";
				if (isset($b['r'][0])) {
					foreach ($b['r'] as $key => &$value) {
						if (is_string($value['t'])) {
							$docx .= $value['t'];
						}
					}
				} else {
					if (is_string($b['r']['t'])) {
						$docx .= $b['r']['t'];
					}
				}
				$docx .= "</p>";
			}
		}
		return $docx;
	}
}

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值