函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数
001.
//获得当前的脚本网址
002.
function
get_php_url(){
003.
if
(!
empty
empty
(
$_SERVER
[
"REQUEST_URI"
])){
004.
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];
005.
$nowurl
=
$scriptName
;
006.
}
else
{
007.
$scriptName
=
$_SERVER
[
"PHP_SELF"
];
008.
if
(
empty
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;
009.
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];
010.
}
011.
return
$nowurl
;
012.
}
013.
//把全角数字转为半角数字
014.
function
GetAlabNum(
$fnum
){
015.
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);
016.
$fnums
=
"0123456789"
;
017.
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);
018.
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);
019.
if
(
$fnum
==
""
)
$fnum
=0;
020.
return
$fnum
;
021.
}
022.
//去除HTML标记
023.
function
Text2Html(
$txt
){
024.
$txt
=
str_replace
(
" "
,
" "
,
$txt
);
025.
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);
026.
$txt
=
str_replace
(
">"
,
">"
,
$txt
);
027.
$txt
= preg_replace(
"/[\r\n]{1,}/isU"
,"
028.
\r\n",
$txt
);
029.
return
$txt
;
030.
}
031.
//清除HTML标记
032.
function
ClearHtml(
$str
){
033.
$str
=
str_replace
(
'<'
,
'<'
,
$str
);
034.
$str
=
str_replace
(
'>'
,
'>'
,
$str
);
035.
return
$str
;
036.
}
037.
//相对路径转化成绝对路径
038.
function
relative_to_absolute(
$content
,
$feed_url
) {
039.
preg_match(
'/(http|https|ftp):\/\//'
,
$feed_url
,
$protocol
);
040.
$server_url
= preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);
041.
$server_url
= preg_replace(
"/\/.*/"
,
""
,
$server_url
);
042.
if
(
$server_url
==
''
) {
043.
return
$content
;
044.
}
045.
if
(isset(
$protocol
[0])) {
046.
$new_content
= preg_replace(
'/href="\//'
,
'href="'
.
$protocol
[0].
$server_url
.
'/'
,
$content
);
047.
$new_content
= preg_replace(
'/src="\//'
,
'src="'
.
$protocol
[0].
$server_url
.
'/'
,
$new_content
);
048.
}
else
{
049.
$new_content
=
$content
;
050.
}
051.
return
$new_content
;
052.
}
053.
//取得所有链接
054.
function
get_all_url(
$code
){
055.
preg_match_all(
'/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i'
,
$code
,
$arr
);
056.
return
array
(
'name'
=>
$arr
[2],
'url'
=>
$arr
[1]);
057.
}
058.
//获取指定标记中的内容
059.
function
get_tag_data(
$str
,
$start
,
$end
){
060.
if
(
$start
==
''
||
$end
==
''
){
061.
return
;
062.
}
063.
$str
=
explode
(
$start
,
$str
);
064.
$str
=
explode
(
$end
,
$str
[1]);
065.
return
$str
[0];
066.
}
067.
//HTML表格的每行转为CSV格式数组
068.
function
get_tr_array(
$table
) {
069.
$table
= preg_replace(
"'<td[^>]*?>'si"
,
'"'
,
$table
);
070.
$table
=
str_replace
(
""
,
'",'
,
$table
);
071.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
072.
//去掉 HTML 标记
073.
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
074.
//去掉空白字符
075.
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
076.
$table
=
str_replace
(
" "
,
""
,
$table
);
077.
$table
=
str_replace
(
" "
,
""
,
$table
);
078.
$table
=
explode
(
",{tr}"
,
$table
);
079.
array_pop
(
$table
);
080.
return
$table
;
081.
}
082.
//将HTML表格的每行每列转为数组,采集表格数据
083.
function
get_td_array(
$table
) {
084.
$table
= preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);
085.
$table
= preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);
086.
$table
= preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);
087.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
088.
$table
=
str_replace
(
""
,
"{td}"
,
$table
);
089.
//去掉 HTML 标记
090.
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
091.
//去掉空白字符
092.
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
093.
$table
=
str_replace
(
" "
,
""
,
$table
);
094.
$table
=
str_replace
(
" "
,
""
,
$table
);
095.
096.
$table
=
explode
(
'{tr}'
,
$table
);
097.
array_pop
(
$table
);
098.
foreach
(
$table
as
$key
=>
$tr
) {
099.
$td
=
explode
(
'{td}'
,
$tr
);
100.
array_pop
(
$td
);
101.
$td_array
[] =
$td
;
102.
}
103.
return
$td_array
;
104.
}
105.
//返回字符串中的所有单词 $distinct=true 去除重复
106.
function
split_en_str(
$str
,
$distinct
=true) {
107.
preg_match_all(
'/([a-zA-Z]+)/'
,
$str
,
$match
);
108.
if
(
$distinct
== true) {
109.
$match
[1] =
array_unique
(
$match
[1]);
110.
}
111.
sort(
$match
[1]);
112.
return
$match
[1];
113.
}
114.
115.
函数描述及例子
116.
117.
PHP采集程序中常用的函数
118.
119.
查询关键字
120.
121.
PHP采集程序中常用的函数
122.
<!--?
123.
//获得当前的脚本网址
124.
function
get_php_url(){
125.
if
(!
empty
empty
(
$_SERVER
[
"REQUEST_URI"
])){
126.
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];
127.
$nowurl
=
$scriptName
;
128.
}
else
{
129.
$scriptName
=
$_SERVER
[
"PHP_SELF"
];
130.
if
(
empty
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;
131.
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];
132.
}
133.
return
$nowurl
;
134.
}
135.
//把全角数字转为半角数字
136.
function
GetAlabNum(
$fnum
){
137.
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);
138.
$fnums
=
"0123456789"
;
139.
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);
140.
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);
141.
if
(
$fnum
==
""
)
$fnum
=0;
142.
return
$fnum
;
143.
}
144.
//去除HTML标记
145.
function
Text2Html(
$txt
){
146.
$txt
=
str_replace
(
" "
,
" "
,
$txt
);
147.
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);
148.
$txt
=
str_replace
(
"-->"
,
">"
,
$txt
);
149.
$txt
= preg_replace(
"/[\r\n]{1,}/isU"
,"
150.
\r\n",
$txt
);
151.
return
$txt
;
152.
}
153.
//清除HTML标记
154.
function
ClearHtml(
$str
){
155.
$str
=
str_replace
(
'<'
,
'<'
,
$str
);
156.
$str
=
str_replace
(
'>'
,
'>'
,
$str
);
157.
return
$str
;
158.
}
159.
//相对路径转化成绝对路径
160.
function
relative_to_absolute(
$content
,
$feed_url
) {
161.
preg_match(
'/(http|https|ftp):\/\//'
,
$feed_url
,
$protocol
);
162.
$server_url
= preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);
163.
$server_url
= preg_replace(
"/\/.*/"
,
""
,
$server_url
);
164.
if
(
$server_url
==
''
) {
165.
return
$content
;
166.
}
167.
if
(isset(
$protocol
[0])) {
168.
$new_content
= preg_replace(
'/href="\//'
,
'href="'
.
$protocol
[0].
$server_url
.
'/'
,
$content
);
169.
$new_content
= preg_replace(
'/src="\//'
,
'src="'
.
$protocol
[0].
$server_url
.
'/'
,
$new_content
);
170.
}
else
{
171.
$new_content
=
$content
;
172.
}
173.
return
$new_content
;
174.
}
175.
//取得所有链接
176.
function
get_all_url(
$code
){
177.
preg_match_all(
'/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i'
,
$code
,
$arr
);
178.
return
array
(
'name'
=>
$arr
[2],
'url'
=>
$arr
[1]);
179.
}
180.
//获取指定标记中的内容
181.
function
get_tag_data(
$str
,
$start
,
$end
){
182.
if
(
$start
==
''
||
$end
==
''
){
183.
return
;
184.
}
185.
$str
=
explode
(
$start
,
$str
);
186.
$str
=
explode
(
$end
,
$str
[1]);
187.
return
$str
[0];
188.
}
189.
//HTML表格的每行转为CSV格式数组
190.
function
get_tr_array(
$table
) {
191.
$table
= preg_replace(
"'<td[^>]*?>'si"
,
'"'
,
$table
);
192.
$table
=
str_replace
(
""
,
'",'
,
$table
);
193.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
194.
//去掉 HTML 标记
195.
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
196.
//去掉空白字符
197.
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
198.
$table
=
str_replace
(
" "
,
""
,
$table
);
199.
$table
=
str_replace
(
" "
,
""
,
$table
);
200.
$table
=
explode
(
",{tr}"
,
$table
);
201.
array_pop
(
$table
);
202.
return
$table
;
203.
}
204.
//将HTML表格的每行每列转为数组,采集表格数据
205.
function
get_td_array(
$table
) {
206.
$table
= preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);
207.
$table
= preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);
208.
$table
= preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);
209.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);
210.
$table
=
str_replace
(
""
,
"{td}"
,
$table
);
211.
//去掉 HTML 标记
212.
$table
= preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);
213.
//去掉空白字符
214.
$table
= preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);
215.
$table
=
str_replace
(
" "
,
""
,
$table
);
216.
$table
=
str_replace
(
" "
,
""
,
$table
);
217.
218.
$table
=
explode
(
'{tr}'
,
$table
);
219.
array_pop
(
$table
);
220.
foreach
(
$table
as
$key
=>
$tr
) {
221.
$td
=
explode
(
'{td}'
,
$tr
);
222.
array_pop
(
$td
);
223.
$td_array
[] =
$td
;
224.
}
225.
return
$td_array
;
226.
}
227.
//返回字符串中的所有单词 $distinct=true 去除重复
228.
function
split_en_str(
$str
,
$distinct
=true) {
229.
preg_match_all(
'/([a-zA-Z]+)/'
,
$str
,
$match
);
230.
if
(
$distinct
== true) {
231.
$match
[1] =
array_unique
(
$match
[1]);
232.
}
233.
sort(
$match
[1]);
234.
return
$match
[1];
235.
}
236.
237.
</td[^></tr[^></table[^></td[^></a\s+href=[
"|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["
|\']?([^>
除非特别声明,PHP100新闻均为原创或投稿报道,转载请注明作者及原文链接
原文地址: http://www.php100.com/html/php/hanshu/2013/0903/1039.html