php正则表达式检测链接是否有效并获取主域名2.0

正则表达式:

//匹配第三个/之前部分
//((http或者https):)0~1次 (//)固定 (字母数字字符串)固定 (.字母数字字符串)0~n次 (域名后缀)固定 (:端口号)0~1次 (/所有)0~1次
$pattern = "/^(((" . $xieyi . ")\:)?\/\/)([a-zA-Z0-9]+)((\.([a-zA-Z0-9]+))$erjin)((" . $houzui . "))((\:([0-9])+)?)((\/.*)?)$/i";

更新:

  • 文件类型单独检测

检测类代码:

 Net.php

<?php
// @etafort@163.com php正则表达式检测链接是否有效并获取主域名2.0
namespace app\controller;
class Net
{
    private static $xieyis = ['http', 'https'];
    private static $page_types = ["html", "htm", "shtml", "css", "xml", "gif", "jpeg", "jpg", "js", "atom", "rss", "mml", "txt", "jad", "wml", "htc", "avif", "png", "svg", "svgz", "tif", "tiff", "wbmp", "webp", "ico", "jng", "bmp", "woff", "woff2", "jar", "war", "ear", "json", "hqx", "doc", "pdf", "ps", "eps", "ai", "rtf", "m3u8", "kml", "kmz", "xls", "eot", "ppt", "odg", "odp", "ods", "odt", "pptx", "xlsx", "docx", "wmlc", "wasm", "7z", "cco", "jardiff", "jnlp", "run", "pl", "pm", "prc", "pdb", "rar", "rpm", "sea", "swf", "sit", "tcl", "tk", "der", "pem", "crt", "xpi", "xhtml", "xspf", "zip", "bin", "exe", "dll", "deb", "dmg", "iso", "img", "msi", "msp", "msm", "mid", "midi", "kar", "mp3", "ogg", "m4a", "ra", "3gpp", "3gp", "ts", "mp4", "mpeg", "mpg", "mov", "webm", "flv", "m4v", "mng", "asx", "asf", "wmv", "avi"];
    private static $page_houzui = ['.php', '.asp', '.jsp', '.html', '.htm'];
    private static $domain_houzui = ['.ac.cn', '.ah.cn', '.archi', '.art', '.asia', '.auto', '.band', '.beer', '.bio', '.biz', '.bj.cn', '.black', '.blue', '.bond', '.cab', '.cafe', '.cash', '.cc', '.center', '.chat', '.city', '.cloud', '.club', '.cn', '.co', '.com', '.com.cn', '.company', '.cool', '.cq.cn', '.cyou', '.design', '.email', '.fan', '.fans', '.fashion', '.fit', '.fj.cn', '.fun', '.fund', '.fyi', '.games', '.gd.cn', '.global', '.gold', '.gov.cn', '.green', '.group', '.gs.cn', '.guru', '.gx.cn', '.gz.cn', '.ha.cn', '.hb.cn', '.he.cn', '.hi.cn', '.hk', '.hk.cn', '.hl.cn', '.hn.cn', '.host', '.icu', '.info', '.ink', '.jl.cn', '.js.cn', '.jx.cn', '.kim', '.law', '.life', '.link', '.live', '.ln.cn', '.lotto', '.love', '.ltd', '.luxe', '.market', '.mba', '.me', '.media', '.mo.cn', '.mobi', '.name', '.net', '.net.cn', '.news', '.nm.cn', '.nx.cn', '.online', '.org', '.org.cn', '.organic', '.pet', '.pink', '.plus', '.poker', '.press', '.pro', '.promo', '.pub', '.pw', '.qh.cn', '.red', '.ren', '.run', '.s', '.sale', '.sc.cn', '.school', '.sd.cn', '.sh.cn', '.shop', '.shopping', '.show', '.site', '.ski', '.sn.cn', '.social', '.space', '.store', '.studio', '.sx.cn', '.tax', '.team', '.tech', '.technology', '.tj.cn', '.today', '.top', '.tv', '.tw.cn', '.uno', '.video', '.vin', '.vip', '.vote', '.voto', '.wang', '.website', '.wiki', '.work', '.world', '.xin', '.xj.cn', '.xyz', '.xz.cn', '.yn.cn', '.yoga', '.zj.cn', '.zone', '.中国', '.中文网', '.企业', '.佛山', '.信息', '.公司', '.商城', '.商店', '.商标', '.在线', '.娱乐', '.广东', '.我爱你', '.手机', '.招聘', '.游戏', '.移动', '.网址', '.网店', '.网站', '.网络', '.购物', '.集团', '.餐厅'];


    //@url 待检测地址
    //@num 域名二级数量
    public static function checkUrl($url,$num='')
    {
        empty($num)?$erjin='*':$erjin='{0,'.($num-1).'}';
        //组装参数
        $xieyi = '';
        $houzui = '';
        foreach (self::$xieyis as $key => $item) {
            $key === (count(self::$xieyis) - 1) ? $xieyi .= preg_quote($item, '/') : $xieyi .= preg_quote($item, '/') . '|';
        }
        foreach (self::$domain_houzui as $key => $item) {
            $key === (count(self::$domain_houzui) - 1) ? $houzui .= preg_quote($item, '/') : $houzui .= preg_quote($item, '/') . '|';
        }
        //匹配第三个/之前部分
        //((http或者https):)0~1次 (//)固定 (字母数字字符串)固定 (.字母数字字符串)0~n次 (域名后缀)固定 (:端口号)0~1次 (/所有)0~1次
        $pattern = "/^(((" . $xieyi . ")\:)?\/\/)([a-zA-Z0-9]+)((\.([a-zA-Z0-9]+))$erjin)((" . $houzui . "))((\:([0-9])+)?)((\/.*)?)$/i";

        preg_match($pattern, $url)?$res = true:$res = false;

        //匹配文件类型后缀名
        if ($res === true) {
            $tmp = explode('/', $url);
            //单域名
            if (count($tmp) <= 3) {
                $res = true;
            } else {
                //第三个斜杠后有无.
                if (strpos($tmp[count($tmp) - 1], '.') === false) {
                    $res = true;
                } else {
                    $tmpx = explode('.', $tmp[count($tmp) - 1]);
                    in_array($tmpx[count($tmpx) - 1], self::$page_types)?$res = true:$res = false;
                }
            }
        }
        return $res;
    }


    //获取链接域名后缀
    public static function getDomainExt($url)
    {
        $status = self::checkUrl($url);
        if($status===false)
            return false;
        $parsedUrl = parse_url($url);
        if (empty($parsedUrl['host']))
            return false;
        if (!in_array($parsedUrl['scheme'], self::$xieyis)) {
            return false;
        }
        $domain = $parsedUrl['host'];
        if (strpos($domain, '.') === false) {
            return false;
        }
        $tmp = explode('.', $domain);
        unset($tmp[0]);
        $p = '';
        $houzui = '';
        foreach (array_reverse($tmp) as $k => $item) {
            if ($k === 0) {
                $p = '.' . $item;
            } else {
                $p = '.' . $item . $p;
            }
            if (in_array($p, self::$domain_houzui))
                $houzui = $p;
        }
        if (empty($houzui))
            $houzui = false;
        return $houzui;

    }

    //获取主域名
    public static function getMainDomain($url)
    {
        $status = self::checkUrl($url);
        if($status===false)
            return false;
        $parsedUrl = parse_url($url);
        if (empty($parsedUrl['host']))
            return false;
        if (!in_array($parsedUrl['scheme'], self::$xieyis)) {
            return false;
        }
        $domain = $parsedUrl['host'];
        if (strpos($domain, '.') === false) {
            return false;
        }
        $tmp = explode('.', $domain);
        $tmp_p = $tmp[0];
        unset($tmp[0]);
        $p = '';
        $houzui = false;
        $tmpx = [];
        foreach (array_reverse($tmp) as $k => $item) {
            $tmpx[] = $item;
            if ($k === 0) {
                $p = '.' . $item;
            } else {
                $p = '.' . $item . $p;
            }
            if (in_array($p, self::$domain_houzui)) {
                $houzui = $p;
                $tmpx = [];
            }

        }
        if (empty($houzui))
            $houzui = false;
        if (empty($tmpx)) {
            $domain = $tmp_p . $houzui;
        } else {
            $domain = $tmpx[0] . $houzui;
        }
        return $domain;
    }

}


实用场景:

怕取数据

  • 16
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

etafort

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值