<?php
include "../../config/common.php";
define( "PREG_HREF_CAT", "/<div id=\"divhangye\">(.*?)<\/div>/is" );
$proxy = new Proxy();
for($page=1; $page<32; $page++) {
$base = 'http://www.cn360cn.com/';
$url = "http://www.cn360cn.com/province_" . $page . ".aspx";
$cats = get_cat($url);
if(empty($cats)) {
continue;
}
foreach ($cats as $item) {
$item = trim($item);
$item = addslashes($base . $item);
$sql = "insert ignore into cn360_cat set url='{$item}' ";
$proxy->sql_query($sql);
}
}
function get_cat($url = null) {
$content = file_get_contents($url);
$content = mb_convert_encoding($content, "utf-8", "gb2312");
preg_match(PREG_HREF_CAT, $content, $temp);
if(isset($temp[1])) {
$temp = $temp[1];
preg_match_all("/<a\s*href=\"?(.*?)\"?>.*?<\/a>/is", $temp, $urls);
return isset($urls[1]) ? $urls[1] : false;
}
return false;
}
include "../../config/common.php";
define( "PREG_HREF_CAT", "/<div id=\"divhangye\">(.*?)<\/div>/is" );
$proxy = new Proxy();
for($page=1; $page<32; $page++) {
$base = 'http://www.cn360cn.com/';
$url = "http://www.cn360cn.com/province_" . $page . ".aspx";
$cats = get_cat($url);
if(empty($cats)) {
continue;
}
foreach ($cats as $item) {
$item = trim($item);
$item = addslashes($base . $item);
$sql = "insert ignore into cn360_cat set url='{$item}' ";
$proxy->sql_query($sql);
}
}
function get_cat($url = null) {
$content = file_get_contents($url);
$content = mb_convert_encoding($content, "utf-8", "gb2312");
preg_match(PREG_HREF_CAT, $content, $temp);
if(isset($temp[1])) {
$temp = $temp[1];
preg_match_all("/<a\s*href=\"?(.*?)\"?>.*?<\/a>/is", $temp, $urls);
return isset($urls[1]) ? $urls[1] : false;
}
return false;
}