正则表达式:
//匹配第三个/之前部分
//((http或者https):)0~1次 (//)固定 (字母数字字符串)固定 (.字母数字字符串)0~n次 (域名后缀)固定 (:端口号)0~1次 (/所有)0~1次
$pattern = "/^(((" . $xieyi . ")\:)?\/\/)([a-zA-Z0-9]+)((\.([a-zA-Z0-9]+))$erjin)((" . $houzui . "))((\:([0-9])+)?)((\/.*)?)$/i";
更新:
- 文件类型单独检测
检测类代码:
Net.php
<?php
// @etafort@163.com php正则表达式检测链接是否有效并获取主域名2.0
namespace app\controller;
class Net
{
private static $xieyis = ['http', 'https'];
private static $page_types = ["html", "htm", "shtml", "css", "xml", "gif", "jpeg", "jpg", "js", "atom", "rss", "mml", "txt", "jad", "wml", "htc", "avif", "png", "svg", "svgz", "tif", "tiff", "wbmp", "webp", "ico", "jng", "bmp", "woff", "woff2", "jar", "war", "ear", "json", "hqx", "doc", "pdf", "ps", "eps", "ai", "rtf", "m3u8", "kml", "kmz", "xls", "eot", "ppt", "odg", "odp", "ods", "odt", "pptx", "xlsx", "docx", "wmlc", "wasm", "7z", "cco", "jardiff", "jnlp", "run", "pl", "pm", "prc", "pdb", "rar", "rpm", "sea", "swf", "sit", "tcl", "tk", "der", "pem", "crt", "xpi", "xhtml", "xspf", "zip", "bin", "exe", "dll", "deb", "dmg", "iso", "img", "msi", "msp", "msm", "mid", "midi", "kar", "mp3", "ogg", "m4a", "ra", "3gpp", "3gp", "ts", "mp4", "mpeg", "mpg", "mov", "webm", "flv", "m4v", "mng", "asx", "asf", "wmv", "avi"];
private static $page_houzui = ['.php', '.asp', '.jsp', '.html', '.htm'];
private static $domain_houzui = ['.ac.cn', '.ah.cn', '.archi', '.art', '.asia', '.auto', '.band', '.beer', '.bio', '.biz', '.bj.cn', '.black', '.blue', '.bond', '.cab', '.cafe', '.cash', '.cc', '.center', '.chat', '.city', '.cloud', '.club', '.cn', '.co', '.com', '.com.cn', '.company', '.cool', '.cq.cn', '.cyou', '.design', '.email', '.fan', '.fans', '.fashion', '.fit', '.fj.cn', '.fun', '.fund', '.fyi', '.games', '.gd.cn', '.global', '.gold', '.gov.cn', '.green', '.group', '.gs.cn', '.guru', '.gx.cn', '.gz.cn', '.ha.cn', '.hb.cn', '.he.cn', '.hi.cn', '.hk', '.hk.cn', '.hl.cn', '.hn.cn', '.host', '.icu', '.info', '.ink', '.jl.cn', '.js.cn', '.jx.cn', '.kim', '.law', '.life', '.link', '.live', '.ln.cn', '.lotto', '.love', '.ltd', '.luxe', '.market', '.mba', '.me', '.media', '.mo.cn', '.mobi', '.name', '.net', '.net.cn', '.news', '.nm.cn', '.nx.cn', '.online', '.org', '.org.cn', '.organic', '.pet', '.pink', '.plus', '.poker', '.press', '.pro', '.promo', '.pub', '.pw', '.qh.cn', '.red', '.ren', '.run', '.s', '.sale', '.sc.cn', '.school', '.sd.cn', '.sh.cn', '.shop', '.shopping', '.show', '.site', '.ski', '.sn.cn', '.social', '.space', '.store', '.studio', '.sx.cn', '.tax', '.team', '.tech', '.technology', '.tj.cn', '.today', '.top', '.tv', '.tw.cn', '.uno', '.video', '.vin', '.vip', '.vote', '.voto', '.wang', '.website', '.wiki', '.work', '.world', '.xin', '.xj.cn', '.xyz', '.xz.cn', '.yn.cn', '.yoga', '.zj.cn', '.zone', '.中国', '.中文网', '.企业', '.佛山', '.信息', '.公司', '.商城', '.商店', '.商标', '.在线', '.娱乐', '.广东', '.我爱你', '.手机', '.招聘', '.游戏', '.移动', '.网址', '.网店', '.网站', '.网络', '.购物', '.集团', '.餐厅'];
//@url 待检测地址
//@num 域名二级数量
public static function checkUrl($url,$num='')
{
empty($num)?$erjin='*':$erjin='{0,'.($num-1).'}';
//组装参数
$xieyi = '';
$houzui = '';
foreach (self::$xieyis as $key => $item) {
$key === (count(self::$xieyis) - 1) ? $xieyi .= preg_quote($item, '/') : $xieyi .= preg_quote($item, '/') . '|';
}
foreach (self::$domain_houzui as $key => $item) {
$key === (count(self::$domain_houzui) - 1) ? $houzui .= preg_quote($item, '/') : $houzui .= preg_quote($item, '/') . '|';
}
//匹配第三个/之前部分
//((http或者https):)0~1次 (//)固定 (字母数字字符串)固定 (.字母数字字符串)0~n次 (域名后缀)固定 (:端口号)0~1次 (/所有)0~1次
$pattern = "/^(((" . $xieyi . ")\:)?\/\/)([a-zA-Z0-9]+)((\.([a-zA-Z0-9]+))$erjin)((" . $houzui . "))((\:([0-9])+)?)((\/.*)?)$/i";
preg_match($pattern, $url)?$res = true:$res = false;
//匹配文件类型后缀名
if ($res === true) {
$tmp = explode('/', $url);
//单域名
if (count($tmp) <= 3) {
$res = true;
} else {
//第三个斜杠后有无.
if (strpos($tmp[count($tmp) - 1], '.') === false) {
$res = true;
} else {
$tmpx = explode('.', $tmp[count($tmp) - 1]);
in_array($tmpx[count($tmpx) - 1], self::$page_types)?$res = true:$res = false;
}
}
}
return $res;
}
//获取链接域名后缀
public static function getDomainExt($url)
{
$status = self::checkUrl($url);
if($status===false)
return false;
$parsedUrl = parse_url($url);
if (empty($parsedUrl['host']))
return false;
if (!in_array($parsedUrl['scheme'], self::$xieyis)) {
return false;
}
$domain = $parsedUrl['host'];
if (strpos($domain, '.') === false) {
return false;
}
$tmp = explode('.', $domain);
unset($tmp[0]);
$p = '';
$houzui = '';
foreach (array_reverse($tmp) as $k => $item) {
if ($k === 0) {
$p = '.' . $item;
} else {
$p = '.' . $item . $p;
}
if (in_array($p, self::$domain_houzui))
$houzui = $p;
}
if (empty($houzui))
$houzui = false;
return $houzui;
}
//获取主域名
public static function getMainDomain($url)
{
$status = self::checkUrl($url);
if($status===false)
return false;
$parsedUrl = parse_url($url);
if (empty($parsedUrl['host']))
return false;
if (!in_array($parsedUrl['scheme'], self::$xieyis)) {
return false;
}
$domain = $parsedUrl['host'];
if (strpos($domain, '.') === false) {
return false;
}
$tmp = explode('.', $domain);
$tmp_p = $tmp[0];
unset($tmp[0]);
$p = '';
$houzui = false;
$tmpx = [];
foreach (array_reverse($tmp) as $k => $item) {
$tmpx[] = $item;
if ($k === 0) {
$p = '.' . $item;
} else {
$p = '.' . $item . $p;
}
if (in_array($p, self::$domain_houzui)) {
$houzui = $p;
$tmpx = [];
}
}
if (empty($houzui))
$houzui = false;
if (empty($tmpx)) {
$domain = $tmp_p . $houzui;
} else {
$domain = $tmpx[0] . $houzui;
}
return $domain;
}
}
实用场景:
怕取数据