网页内容写入word文档

最新推荐文章于 2024-02-01 18:36:30 发布

暗影帝皇天

最新推荐文章于 2024-02-01 18:36:30 发布

阅读量990

点赞数

分类专栏： php

php 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

 
 /** 
 
 * 根据HTML代码获取word文档内容 
 
 * 创建一个本质为mht的文档，该函数会分析文件内容并从远程下载页面中的图片资源 
 
 * 该函数依赖于类WordMake 
 
 * 该函数会分析img标签，提取src的属性值。但是，src的属性值必须被引号包围，否则不能提取 
 
 * 
 
 * @param string $content HTML内容 
 
 * @param string $absolutePath 网页的绝对路径。如果HTML内容里的图片路径为相对路径，那么就需要填写这个参数，来让该函数自动填补成绝对路径。这个参数最后需要以/结束 
 
 * @param bool $isEraseLink 是否去掉HTML内容中的链接 
 
 */ 
 
 function  
 WordMake(  
 $content  
 ,  
 $absolutePath  
 =  
 ""  
 ,  
 $isEraseLink  
 = true ) 
 
 { 
 
 import( 
 "@.Util.Wordmaker" 
 ); 
 
 $mht  
 =  
 new  
 Wordmaker(); 
 
 if  
 ( 
 $isEraseLink 
 ){ 
 
 $content  
 = preg_replace( 
 '/<a\s*.*?\s*>(\s*.*?\s*)<\/a>/i'  
 ,  
 '$1'  
 ,  
 $content 
 );    
 //去掉链接 
 
 } 
 
 $images  
 =  
 array 
 (); 
 
 $files  
 =  
 array 
 (); 
 
 $matches  
 =  
 array 
 (); 
 
 //这个算法要求src后的属性值必须使用引号括起来 
 
 if  
 ( preg_match_all( 
 '/<img[.\n]*?src\s*?=\s*?[\"\'](.*?)[\"\'](.*?)\/>/i' 
 , 
 $content  
 , 
 $matches  
 ) ){ 
 
 $arrPath  
 =  
 $matches 
 [1]; 
 
 for  
 (  
 $i 
 =0; 
 $i 
 < 
 count 
 ( 
 $arrPath 
 ); 
 $i 
 ++) 
 
 { 
 
 $path  
 =  
 $arrPath 
 [ 
 $i 
 ]; 
 
 $imgPath  
 = trim(  
 $path  
 ); 
 
 if  
 (  
 $imgPath  
 !=  
 ""  
 ) 
 
 { 
 
 $files 
 [] =  
 $imgPath 
 ; 
 
 if 
 (  
 substr 
 ( 
 $imgPath 
 ,0,7) ==  
 'http://' 
 ) 
 
 { 
 
 //绝对链接，不加前缀 
 
 } 
 
 else 
 
 { 
 
 $imgPath  
 =  
 $absolutePath 
 . 
 $imgPath 
 ; 
 
 } 
 
 $images 
 [] =  
 $imgPath 
 ; 
 
 } 
 
 } 
 
 } 
 
 $mht 
 ->AddContents( 
 "tmp.html" 
 , 
 $mht 
 ->GetMimeType( 
 "tmp.html" 
 ), 
 $content 
 ); 
 
 for  
 (  
 $i 
 =0; 
 $i 
 < 
 count 
 ( 
 $images 
 ); 
 $i 
 ++) 
 
 { 
 
 $image  
 =  
 $images 
 [ 
 $i 
 ]; 
 
 if  
 ( @ 
 fopen 
 ( 
 $image  
 ,  
 'r' 
 ) ) 
 
 { 
 
 $imgcontent  
 = @ 
 file_get_contents 
 (  
 $image  
 ); 
 
 if  
 (  
 $content  
 ) 
 
 $mht 
 ->AddContents( 
 $files 
 [ 
 $i 
 ], 
 $mht 
 ->GetMimeType( 
 $image 
 ), 
 $imgcontent 
 ); 
 
 } 
 
 else 
 
 { 
 
 echo  
 "file:" 
 . 
 $image 
 . 
 " not exist!<br />" 
 ; 
 
 } 
 
 } 
 
 return  
 $mht 
 ->GetFile(); 
 }
 
 
 
 
 
 
 
 
 //逻辑代码
 
 
 $data    
 =  
 $this 
 ->db->where( 
 array 
 ( 
 'id' 
 =>1))->find(); 
 //从数据库查询一条数据 
 
 $this 
 ->assign( 
 $data 
 ); 
 //把获取的数据传递的模板，替换模板里面的变量 
 
 $content  
 =  
 $this 
 ->fetch( 
 'word' 
 ); 
 //获取模板内容信息word是模板的名称 
 
 $fileContent  
 = WordMake( 
 $content 
 ); 
 //生成word内容 
 
 $name  
 = iconv( 
 "utf-8" 
 ,  
 "GBK" 
 , 
 $data 
 [ 
 'username' 
 ]); 
 //转换好生成的word文件名编码 
 
 $fp  
 =  
 fopen 
 ( 
 $_path 
 . 
 $name 
 . 
 '[' 
 . 
 $data 
 [ 
 'number' 
 ]. 
 "].doc" 
 ,  
 'w' 
 ); 
 //打开生成的文档 
 
 fwrite( 
 $fp 
 ,  
 $fileContent 
 ); 
 //写入包保存文件 
 
 fclose( 
 $fp 
 );