PHP采集程序中常用的函数

最新推荐文章于 2024-06-25 09:25:49 发布
Lankecms
最新推荐文章于 2024-06-25 09:25:49 发布
阅读量396
点赞数
分类专栏： PHP
PHP 专栏收录该内容
141 篇文章 0 订阅
订阅专栏
函数描述及例子 PHP采集程序中常用的函数查询关键字 PHP采集程序中常用的函数
 
    001. 
    //获得当前的脚本网址 
   
    002. 
    function get_php_url(){ 
   
    003. 
    if(!emptyempty($_SERVER["REQUEST_URI"])){ 
   
    004. 
    $scriptName = $_SERVER["REQUEST_URI"]; 
   
    005. 
    $nowurl = $scriptName; 
   
    006. 
    }else{ 
   
    007. 
    $scriptName = $_SERVER["PHP_SELF"]; 
   
    008. 
    if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName; 
   
    009. 
    else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"]; 
   
    010. 
    } 
   
    011. 
    return $nowurl; 
   
    012. 
    } 
   
    013. 
    //把全角数字转为半角数字 
   
    014. 
    function GetAlabNum($fnum){ 
   
    015. 
    $nums = array("０","１","２","３","４","５","６","７","８","９"); 
   
    016. 
    $fnums = "0123456789"; 
   
    017. 
    for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); 
   
    018. 
    $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum); 
   
    019. 
    if($fnum=="") $fnum=0; 
   
    020. 
    return $fnum; 
   
    021. 
    } 
   
    022. 
    //去除HTML标记 
   
    023. 
    function Text2Html($txt){ 
   
    024. 
    $txt = str_replace("  ","　",$txt); 
   
    025. 
    $txt = str_replace("<","<",$txt); 
   
    026. 
    $txt = str_replace(">",">",$txt); 
   
    027. 
    $txt = preg_replace("/[\r\n]{1,}/isU"," 
   
    028. 
    \r\n",$txt); 
   
    029. 
    return $txt; 
   
    030. 
    } 
   
    031. 
    //清除HTML标记 
   
    032. 
    function ClearHtml($str){ 
   
    033. 
    $str = str_replace('<','<',$str); 
   
    034. 
    $str = str_replace('>','>',$str); 
   
    035. 
    return $str; 
   
    036. 
    } 
   
    037. 
    //相对路径转化成绝对路径 
   
    038. 
    function relative_to_absolute($content, $feed_url) { 
   
    039. 
    preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol); 
   
    040. 
    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); 
   
    041. 
    $server_url = preg_replace("/\/.*/", "", $server_url); 
   
    042. 
    if ($server_url == '') { 
   
    043. 
    return $content; 
   
    044. 
    } 
   
    045. 
    if (isset($protocol[0])) { 
   
    046. 
    $new_content = preg_replace('/href="\//','href="'.$protocol[0].$server_url.'/', $content); 
   
    047. 
    $new_content = preg_replace('/src="\//','src="'.$protocol[0].$server_url.'/', $new_content); 
   
    048. 
    } else { 
   
    049. 
    $new_content = $content; 
   
    050. 
    } 
   
    051. 
    return $new_content; 
   
    052. 
    } 
   
    053. 
    //取得所有链接 
   
    054. 
    function get_all_url($code){ 
   
    055. 
    preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr); 
   
    056. 
    return array('name'=>$arr[2],'url'=>$arr[1]); 
   
    057. 
    } 
   
    058. 
    //获取指定标记中的内容 
   
    059. 
    function get_tag_data($str, $start, $end){ 
   
    060. 
    if ( $start == '' || $end == '' ){ 
   
    061. 
    return; 
   
    062. 
    } 
   
    063. 
    $str = explode($start, $str); 
   
    064. 
    $str = explode($end, $str[1]); 
   
    065. 
    return $str[0]; 
   
    066. 
    } 
   
    067. 
    //HTML表格的每行转为CSV格式数组 
   
    068. 
    function get_tr_array($table) { 
   
    069. 
    $table = preg_replace("'<td[^>]*?>'si",'"',$table); 
   
    070. 
    $table = str_replace("",'",',$table); 
   
    071. 
    $table = str_replace("","{tr}",$table); 
   
    072. 
    //去掉 HTML 标记 
   
    073. 
    $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); 
   
    074. 
    //去掉空白字符 
   
    075. 
    $table = preg_replace("'([\r\n])[\s]+'","",$table); 
   
    076. 
    $table = str_replace(" ","",$table); 
   
    077. 
    $table = str_replace(" ","",$table); 
   
    078. 
    $table = explode(",{tr}",$table); 
   
    079. 
    array_pop($table); 
   
    080. 
    return $table; 
   
    081. 
    } 
   
    082. 
    //将HTML表格的每行每列转为数组，采集表格数据 
   
    083. 
    function get_td_array($table) { 
   
    084. 
    $table = preg_replace("'<table[^>]*?>'si","",$table); 
   
    085. 
    $table = preg_replace("'<tr[^>]*?>'si","",$table); 
   
    086. 
    $table = preg_replace("'<td[^>]*?>'si","",$table); 
   
    087. 
    $table = str_replace("","{tr}",$table); 
   
    088. 
    $table = str_replace("","{td}",$table); 
   
    089. 
    //去掉 HTML 标记 
   
    090. 
    $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); 
   
    091. 
    //去掉空白字符 
   
    092. 
    $table = preg_replace("'([\r\n])[\s]+'","",$table); 
   
    093. 
    $table = str_replace(" ","",$table); 
   
    094. 
    $table = str_replace(" ","",$table); 
   
    095. 
      
    096. 
    $table = explode('{tr}', $table); 
   
    097. 
    array_pop($table); 
   
    098. 
    foreach ($table as $key=>$tr) { 
   
    099. 
    $td = explode('{td}', $tr); 
   
    100. 
    array_pop($td); 
   
    101. 
    $td_array[] = $td; 
   
    102. 
    } 
   
    103. 
    return $td_array; 
   
    104. 
    } 
   
    105. 
    //返回字符串中的所有单词 $distinct=true 去除重复 
   
    106. 
    function split_en_str($str,$distinct=true) { 
   
    107. 
    preg_match_all('/([a-zA-Z]+)/',$str,$match); 
   
    108. 
    if ($distinct == true) { 
   
    109. 
    $match[1] = array_unique($match[1]); 
   
    110. 
    } 
   
    111. 
    sort($match[1]); 
   
    112. 
    return $match[1]; 
   
    113. 
    } 
   
    114. 
      
    115. 
    函数描述及例子 
   
    116. 
      
    117. 
    PHP采集程序中常用的函数 
   
    118. 
      
    119. 
    查询关键字 
   
    120. 
      
    121. 
    PHP采集程序中常用的函数 
   
    122. 
    <!--? 
   
    123. 
    //获得当前的脚本网址 
   
    124. 
    function get_php_url(){ 
   
    125. 
    if(!emptyempty($_SERVER["REQUEST_URI"])){ 
   
    126. 
    $scriptName = $_SERVER["REQUEST_URI"]; 
   
    127. 
    $nowurl = $scriptName; 
   
    128. 
    }else{ 
   
    129. 
    $scriptName = $_SERVER["PHP_SELF"]; 
   
    130. 
    if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName; 
   
    131. 
    else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"]; 
   
    132. 
    } 
   
    133. 
    return $nowurl; 
   
    134. 
    } 
   
    135. 
    //把全角数字转为半角数字 
   
    136. 
    function GetAlabNum($fnum){ 
   
    137. 
    $nums = array("０","１","２","３","４","５","６","７","８","９"); 
   
    138. 
    $fnums = "0123456789"; 
   
    139. 
    for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); 
   
    140. 
    $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum); 
   
    141. 
    if($fnum=="") $fnum=0; 
   
    142. 
    return $fnum; 
   
    143. 
    } 
   
    144. 
    //去除HTML标记 
   
    145. 
    function Text2Html($txt){ 
   
    146. 
    $txt = str_replace("  ","　",$txt); 
   
    147. 
    $txt = str_replace("<","<",$txt); 
   
    148. 
    $txt = str_replace("-->",">",$txt); 
   
    149. 
    $txt = preg_replace("/[\r\n]{1,}/isU"," 
   
    150. 
    \r\n",$txt); 
   
    151. 
    return $txt; 
   
    152. 
    } 
   
    153. 
    //清除HTML标记 
   
    154. 
    function ClearHtml($str){ 
   
    155. 
    $str = str_replace('<','<',$str); 
   
    156. 
    $str = str_replace('>','>',$str); 
   
    157. 
    return $str; 
   
    158. 
    } 
   
    159. 
    //相对路径转化成绝对路径 
   
    160. 
    function relative_to_absolute($content, $feed_url) { 
   
    161. 
    preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol); 
   
    162. 
    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); 
   
    163. 
    $server_url = preg_replace("/\/.*/", "", $server_url); 
   
    164. 
    if ($server_url == '') { 
   
    165. 
    return $content; 
   
    166. 
    } 
   
    167. 
    if (isset($protocol[0])) { 
   
    168. 
    $new_content = preg_replace('/href="\//','href="'.$protocol[0].$server_url.'/', $content); 
   
    169. 
    $new_content = preg_replace('/src="\//','src="'.$protocol[0].$server_url.'/', $new_content); 
   
    170. 
    } else { 
   
    171. 
    $new_content = $content; 
   
    172. 
    } 
   
    173. 
    return $new_content; 
   
    174. 
    } 
   
    175. 
    //取得所有链接 
   
    176. 
    function get_all_url($code){ 
   
    177. 
    preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr); 
   
    178. 
    return array('name'=>$arr[2],'url'=>$arr[1]); 
   
    179. 
    } 
   
    180. 
    //获取指定标记中的内容 
   
    181. 
    function get_tag_data($str, $start, $end){ 
   
    182. 
    if ( $start == '' || $end == '' ){ 
   
    183. 
    return; 
   
    184. 
    } 
   
    185. 
    $str = explode($start, $str); 
   
    186. 
    $str = explode($end, $str[1]); 
   
    187. 
    return $str[0]; 
   
    188. 
    } 
   
    189. 
    //HTML表格的每行转为CSV格式数组 
   
    190. 
    function get_tr_array($table) { 
   
    191. 
    $table = preg_replace("'<td[^>]*?>'si",'"',$table); 
   
    192. 
    $table = str_replace("",'",',$table); 
   
    193. 
    $table = str_replace("","{tr}",$table); 
   
    194. 
    //去掉 HTML 标记 
   
    195. 
    $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); 
   
    196. 
    //去掉空白字符 
   
    197. 
    $table = preg_replace("'([\r\n])[\s]+'","",$table); 
   
    198. 
    $table = str_replace(" ","",$table); 
   
    199. 
    $table = str_replace(" ","",$table); 
   
    200. 
    $table = explode(",{tr}",$table); 
   
    201. 
    array_pop($table); 
   
    202. 
    return $table; 
   
    203. 
    } 
   
    204. 
    //将HTML表格的每行每列转为数组，采集表格数据 
   
    205. 
    function get_td_array($table) { 
   
    206. 
    $table = preg_replace("'<table[^>]*?>'si","",$table); 
   
    207. 
    $table = preg_replace("'<tr[^>]*?>'si","",$table); 
   
    208. 
    $table = preg_replace("'<td[^>]*?>'si","",$table); 
   
    209. 
    $table = str_replace("","{tr}",$table); 
   
    210. 
    $table = str_replace("","{td}",$table); 
   
    211. 
    //去掉 HTML 标记 
   
    212. 
    $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); 
   
    213. 
    //去掉空白字符 
   
    214. 
    $table = preg_replace("'([\r\n])[\s]+'","",$table); 
   
    215. 
    $table = str_replace(" ","",$table); 
   
    216. 
    $table = str_replace(" ","",$table); 
   
    217. 
      
    218. 
    $table = explode('{tr}', $table); 
   
    219. 
    array_pop($table); 
   
    220. 
    foreach ($table as $key=>$tr) { 
   
    221. 
    $td = explode('{td}', $tr); 
   
    222. 
    array_pop($td); 
   
    223. 
    $td_array[] = $td; 
   
    224. 
    } 
   
    225. 
    return $td_array; 
   
    226. 
    } 
   
    227. 
    //返回字符串中的所有单词 $distinct=true 去除重复 
   
    228. 
    function split_en_str($str,$distinct=true) { 
   
    229. 
    preg_match_all('/([a-zA-Z]+)/',$str,$match); 
   
    230. 
    if ($distinct == true) { 
   
    231. 
    $match[1] = array_unique($match[1]); 
   
    232. 
    } 
   
    233. 
    sort($match[1]); 
   
    234. 
    return $match[1]; 
   
    235. 
    } 
   
    236. 
      
    237. 
    </td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^>