第一章 没啥很难得问题 ,我个人觉得要学习重点的点就这几个。
目录
1.1实现类自动加载
看这本书的时候,为自己方便,我并没有网上下载本书的源代码,而是看完自己敲出来的。所以我的文件目录跟这本书的可能有些不一样,但是问题不大,所有人都能决绝。
这是目录结构
Loader.php 代码
<?php
namespace Application\Autoload;
class Loader
{
const UNABLE_TO_LOAD = "";
static $dirs = array();
static $registered = 0;
protected static function loadFile($file)
{
if (file_exists($file)) {
require_once $file;
return true;
}
return false;
}
public static function autoLoad($class)
{
$success = false;
$fn = str_replace('\\', DIRECTORY_SEPARATOR, $class) . '.php';
foreach (self::$dirs as $start) {
$file = $start . DIRECTORY_SEPARATOR . $fn;
if (self::loadFile($file)) {
$success = true;
break;
}
}
if (!$success) {
if (!self::loadFile(__DIR__ . DIRECTORY_SEPARATOR . $fn)) {
throw new \Exception(self::UNABLE_TO_LOAD . ' ' . $class);
}
}
return $success;
}
public static function addDirs($dirs)
{
if (is_array($dirs)) {
self::$dirs = array_merge(self::$dirs, $dirs);
} else {
self::$dirs[] = $dirs;
}
}
public static function init($dirs = array())
{
if ($dirs) {
self::addDirs($dirs);
}
if (self::$registered == 0) {
spl_autoload_register(__CLASS__ . '::autoload');
self::$registered++;
}
}
public function __construct($dirs = array())
{
self::init($dirs);
}
}
TestClass.php 代码
<?php
namespace Application\Test;
class TestClass{
public function getTest(){
return __METHOD__;
}
}
index.php 代码
<?php
require __DIR__.'/Application/Autoload/Loader.php';
Application\Autoload\Loader::init(__DIR__.'/.');
$test=new Application\Test\TestClass;
echo $test->getTest();
运行效果
感悟和讲解: 此代码的核心代码段在于Loader.php这段代码上,这段代码Loader类内有功能很明确的代码段。比如判断是否存在此类文件,
1.2 网站扫描
代码特点:非正则表达式来爬虫,使用PHP内置的DOMDocument类;
Hoover.php 代码
<?php
namespace Application\Web;
class Hoover{
protected $content;
public function getContent($url){
if(!$this->content){
if(stripos($url,'http')!==0){
$url='http://'.$url;
}
$this->content=new \DOMDocument('1.0','utf-8');
$this->preserveWhiteSpacea=FALSE;
@$this->content->loadHTMLFile($url);
}
return $this->content;
}
public function getTags($url,$tag){
$count=0;
$result=array();
$elements=$this->getContent($url)->getElementsByTagName($tag);
foreach($elements as $node){
$result[$count]['value']=trim(preg_replace('/\s+/',' ',$node->nodeValue));
if($node->hasAttributes()){
foreach($node->attributes as $name=>$attr){
$result[$count]['attributes'][$name]=$attr->value;
}
}
$count++;
}
return $result;
}
public function getAttribute($url,$attr,$domain=NULL){
$result=array();
$elements=$this->getContent($url)->getElementsByTagName('*');
foreach($elements as $node){
if($node->hasAttribute($attr)){
$value=$node->getAttribute($attr);
if($domain){
if(stripos($value,$domain)!==FALSE){
$result[]=trim($value);
}
}else{
$result[]=trim($value);
}
}
}
return $result;
}
}
vaccuuming_website.php 代码
<?php
define('DEFAULT_URL','http://www.baidu.com');
define('DEFAULT_TAG','a');
require __DIR__.'/Application/Autoload/Loader.php';
Application\Autoload\Loader::init(__DIR__.'/.');
$vac=new Application\Web\Hoover();
$url=strip_tags($_GET['url']??DEFAULT_URL);
$tag=strip_tags($_GET['tag']??DEFAULT_TAG);
echo 'Dump of Tags:'.PHP_EOL;
var_dump($vac->getTags($url,$tag));
感悟和讲解:此代码我觉得最大优点也就是,非正表达式来获取网页内容
1.3 创建深层次的网页扫描
Deep.php代码
<?php
namespace Application\Web;
class Deep{
protected $domain;
public function scan($url,$tag){
$vac=new Hoover();
$scan=$vac->getAttribute($url,'href',$this->getDomain($url));
$result=array();
foreach($scan as $subSite){
yield from $vac->getTags($subSite,$tag);
}
return count($scan);
}
public function getDomain($url){
if(!$this->domain){
$this->domain=parse_url($url,PHP_URL_HOST);
}
return $this->domain;
}
}
deep_scan_website.php代码
<?php
require __DIR__.'/Application/Autoload/Loader.php';
define('DEFAULT_URL','588ku.com');
define('DEFAULT_TAG','img');
Application\Autoload\Loader::init(__DIR__.'/.');
$deep=new Application\Web\Deep();
$url=strip_tags($_GET['url']??DEFAULT_URL);
$tag=strip_tags($_GET['tag']??DEFAULT_TAG);
foreach($deep->scan($url,$tag) as $item){
$src=$item['attributes']['src'] ?? NULL;
if($src && (stripos($src,'png')||stripos($src,'jpg'))){
printf("<br><img src='%s'/>\n",$src);
}
}