PHP7编程实践学习笔记（第一章）

最新推荐文章于 2024-04-24 14:40:49 发布

xerzatxerzat0320

最新推荐文章于 2024-04-24 14:40:49 发布

阅读量116

点赞数

文章标签： PHP7 编程实践 Doug Bierer 学习笔记

本文链接：https://blog.csdn.net/xerzatxerzat0320/article/details/90055730

版权

第一章没啥很难得问题，我个人觉得要学习重点的点就这几个。

1.1实现类自动加载

1.2网站扫描

1.3创建深层次的网页扫描器

1.1实现类自动加载

看这本书的时候，为自己方便，我并没有网上下载本书的源代码，而是看完自己敲出来的。所以我的文件目录跟这本书的可能有些不一样，但是问题不大，所有人都能决绝。

这是目录结构

Loader.php 代码

<?php
namespace Application\Autoload;
class Loader
{
    const UNABLE_TO_LOAD = "";
    static $dirs = array();
    static $registered = 0;
    protected static function loadFile($file)
    {
        if (file_exists($file)) {
            require_once $file;
            return true;
        }
        return false;
    }
    public static function autoLoad($class)
    {
        $success = false;
        $fn = str_replace('\\', DIRECTORY_SEPARATOR, $class) . '.php';
        foreach (self::$dirs as $start) {
            $file = $start . DIRECTORY_SEPARATOR . $fn;
            if (self::loadFile($file)) {
                $success = true;
                break;
            }
        }
        if (!$success) {
            if (!self::loadFile(__DIR__ . DIRECTORY_SEPARATOR . $fn)) {
                throw new \Exception(self::UNABLE_TO_LOAD . ' ' . $class);
            }
        }
        return $success;
    }
    public static function addDirs($dirs)
    {
        if (is_array($dirs)) {
            self::$dirs = array_merge(self::$dirs, $dirs);
        } else {
            self::$dirs[] = $dirs;
        }
    }
    public static function init($dirs = array())
    {
        if ($dirs) {
            self::addDirs($dirs);
        }
        if (self::$registered == 0) {
            spl_autoload_register(__CLASS__ . '::autoload');
            self::$registered++;
        }
    }
    public function __construct($dirs = array())
    {
        self::init($dirs);
    }
}

TestClass.php 代码

<?php
namespace Application\Test;
class TestClass{
    public function getTest(){
        return __METHOD__;
    }
}

index.php 代码

<?php   
require __DIR__.'/Application/Autoload/Loader.php';

Application\Autoload\Loader::init(__DIR__.'/.');
$test=new Application\Test\TestClass;
echo $test->getTest();

运行效果

感悟和讲解：此代码的核心代码段在于Loader.php这段代码上，这段代码Loader类内有功能很明确的代码段。比如判断是否存在此类文件，

1.2 网站扫描

代码特点：非正则表达式来爬虫，使用PHP内置的DOMDocument类；

Hoover.php 代码

<?php
namespace Application\Web;
class Hoover{
    protected $content;
    public function getContent($url){
        if(!$this->content){
            if(stripos($url,'http')!==0){
                $url='http://'.$url;
            }
           $this->content=new \DOMDocument('1.0','utf-8');
            $this->preserveWhiteSpacea=FALSE;
            @$this->content->loadHTMLFile($url);
        }
        return $this->content;
    }

    public function getTags($url,$tag){
        $count=0;
        $result=array();
        $elements=$this->getContent($url)->getElementsByTagName($tag);
        foreach($elements as $node){
            $result[$count]['value']=trim(preg_replace('/\s+/',' ',$node->nodeValue));
            if($node->hasAttributes()){
                foreach($node->attributes as $name=>$attr){
                $result[$count]['attributes'][$name]=$attr->value;
                }
            }
            $count++;
        }
        return $result;
    }

    public function getAttribute($url,$attr,$domain=NULL){
        $result=array();
        $elements=$this->getContent($url)->getElementsByTagName('*');

        foreach($elements as $node){
            if($node->hasAttribute($attr)){
                $value=$node->getAttribute($attr);
                if($domain){
                    if(stripos($value,$domain)!==FALSE){
                          $result[]=trim($value);  
                    }
                }else{
                    $result[]=trim($value);
                }
            }
        }
        return $result;
    }
    
}

vaccuuming_website.php 代码

<?php
define('DEFAULT_URL','http://www.baidu.com');
define('DEFAULT_TAG','a');
require __DIR__.'/Application/Autoload/Loader.php';
Application\Autoload\Loader::init(__DIR__.'/.');

$vac=new Application\Web\Hoover();

$url=strip_tags($_GET['url']??DEFAULT_URL);
$tag=strip_tags($_GET['tag']??DEFAULT_TAG);

echo 'Dump of Tags:'.PHP_EOL;
var_dump($vac->getTags($url,$tag));

感悟和讲解：此代码我觉得最大优点也就是，非正表达式来获取网页内容

1.3 创建深层次的网页扫描

Deep.php代码

<?php
namespace Application\Web;

class Deep{
    protected $domain;

    public function scan($url,$tag){
        $vac=new Hoover();
        $scan=$vac->getAttribute($url,'href',$this->getDomain($url));
        $result=array();
        foreach($scan as $subSite){
            yield from $vac->getTags($subSite,$tag);
        }
        return count($scan);
    }
    public function getDomain($url){
        if(!$this->domain){
            $this->domain=parse_url($url,PHP_URL_HOST);
        }
        return $this->domain;
    }
}

deep_scan_website.php代码

<?php
require __DIR__.'/Application/Autoload/Loader.php';
define('DEFAULT_URL','588ku.com');
define('DEFAULT_TAG','img');
Application\Autoload\Loader::init(__DIR__.'/.');
$deep=new Application\Web\Deep();
$url=strip_tags($_GET['url']??DEFAULT_URL);
$tag=strip_tags($_GET['tag']??DEFAULT_TAG);

foreach($deep->scan($url,$tag) as $item){
    $src=$item['attributes']['src'] ?? NULL;
    if($src && (stripos($src,'png')||stripos($src,'jpg'))){
        printf("<br><img src='%s'/>\n",$src);
    }
}

xerzatxerzat0320

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
PHP7编程实践学习笔记（第一章）

第一章没啥很难得问题，我个人觉得要学习重点的点就这几个。目录1.1实现类自动加载1.2网站扫描1.3创建深层次的网页扫描器1.1实现类自动加载看这本书的时候，为自己方便，我并没有网上下载本书的源代码，而是看完自己敲出来的。所以我的文件目录跟这本书的可能有些不一样，但是问题不大，所有人都能决绝。这是目录结构Loader.php 代码<...
复制链接

扫一扫