php解析mht,php解析mht文件转换成html的实例

/**

* 针对Mht格式的文件进行解析

* 使用例子:

*

* function mhtmlParseBody($filename) {

if (file_exists ( $filename )) {

if (is_dir ( $filename )) return false;

$filename = strtolower ( $filename );

if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;

$o_mhtml = new mhtml ();

$o_mhtml->set_file ( $filename );

$o_mhtml->extract ();

return $o_mhtml->get_part_to_file(0);

}

return null;

}

function mhtmlParseAll($filename) {

if (file_exists ( $filename )) {

if (is_dir ( $filename )) return false;

$filename = strtolower ( $filename );

if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;

$o_mhtml = new mhtml ();

$o_mhtml->set_file ( $filename );

$o_mhtml->extract ();

return $o_mhtml->get_all_part_file();

}

return null;

}

*/

classmhtparse {

var$file=''

var$boundary=''

var$filedata=''

var$countparts= 1;

var$log=''

functionextract() {

$this->read_filedata ();

$this->file_parts ();

return1;

}

functionset_file($p) {

$this->file =$p;

}

functionget_log() {

return$this->log;

}

functionfile_parts() {

$lines=explode("\n",substr($this->filedata, 0, 8192 ) );

foreach($linesas$line) {

$line= trim ($line);

if(strpos($line,'=') !== FALSE) {

if(strpos($line,'boundary', 0 ) !== FALSE) {

$range=$this->getrange ($line,'"','"', 0 );

$this->boundary ="--".$range['range'];

$this->filedata =str_replace($line,'',$this->filedata );

break;

}

}

}

if($this->boundary !='') {

$this->filedata =explode($this->boundary,$this->filedata );

unset ($this->filedata [0] );

$this->filedata =array_values($this->filedata );

$this->countparts =count($this->filedata );

}else{

$tmp=$this->filedata;

$this->filedata =array(

$tmp

);

}

}

functionget_all_part_file() {

return$this->filedata;

}

functionget_part_to_file($i) {

$line_data_start= 0;

$encoding=''

$part_lines=explode("\n", ltrim ($this->filedata [$i] ) );

foreach($part_linesas$line_id=>$line) {

$line= trim ($line);

if($line=='') {

if(trim ($part_lines[0] ) =='--')

return1;

$line_data_start=$line_id;

break;

}

if(strpos($line,':') !== FALSE) {

$pos=strpos($line,':');

$k=strtolower( trim (substr($line, 0,$pos) ) );

$v= trim (substr($line,$pos+ 1,strlen($line) ) );

if($k=='content-transfer-encoding') {

$encoding=$v;

}

if($k=='content-location') {

$location=$v;

}

if($k=='content-type') {

$contenttype=$v;

}

}

}

foreach($part_linesas$line_id=>$line) {

if($line_id<=$line_data_start)

$part_lines[$line_id] =''

}

$part_lines= implode ('',$part_lines);

if($encoding=='base64')

$part_lines=base64_decode($part_lines);

elseif($encoding=='quoted-printable')

$part_lines= imap_qprint ($part_lines);

return$part_lines;

}

functionread_filedata() {

$handle=fopen($this->file,'r');

$this->filedata =fread($handle,filesize($this->file ) );

fclose ($handle);

}

functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) {

/*

*  $str="sssss { x { xx } {xx{xx } x} x} sssss";  $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: "  x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo  $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out:  array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos)  | false v1.1 2004-2006,Uku-Kaarel  J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693

*/

if(empty($Beginmark_str))

$Beginmark_str='{'

$Beginmark_str_len=strlen($Beginmark_str);

if(empty($Endmark_str))

$Endmark_str='}'

$Endmark_str_len=strlen($Endmark_str);

/* $Start_pos_cache = 0; */

do{

/* !algus */

if(!is_int($Begin_firstOccurence_pos))

$Start_pos_cache=$Start_pos;

/* ?algus-test */

$Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);

/* this is possible start for range */

if(is_int($Start_pos_cache)) {

/* skip */

$Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len);

/* test possible range start pos */

if(is_int($Begin_firstOccurence_pos)) {

if($Start_pos_cache

$rangeClean= 0;

elseif($Start_pos_cache>$range_end_pos)

$rangeClean= 1;

}

/* here it is */

if(!is_int($Begin_firstOccurence_pos))

$Begin_firstOccurence_pos=$Start_pos_cache;

}/* VIGA NR 0 ALGUST EI OLE */

if(!is_int($Start_pos_cache)) {

/* !algus */

/* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */

if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache

$rangeClean= 1;

else

returnfalse;

}

if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) {

if(!is_int($End_pos_cache))

$End_sequel_pos=$Begin_firstOccurence_pos;

$End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);

/* ok */

if(is_int($End_pos_cache)and($rangeClean!= 1)) {

$range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos);

$End_sequel_pos= ($End_pos_cache+$Endmark_str_len);

$range_end_pos=$End_pos_cache;

}

/* VIGA NR 2 LOPPU EI LEITUD */

if(!is_int($End_pos_cache))

if($End_pos_cache== false)

returnfalse;

}

}while($rangeClean< 1 );

if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))

$Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);

else

returnfalse;

returnarray(

'range'=>$Range,

'begin'=>$Begin_firstOccurence_pos,

'end'=>$End_sequel_pos

);

}// end getrange()

}// class

?>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值