数据库的表结构:
php 爬虫爬第一级:
<?php
$con =mysql_connect("127.0.0.1:3306","root","root");
if (!$con)
{
die('Could not connect: ' . mysql_error());
}
mysql_select_db("fruitdb", $con);
$url="http://res.42du.cn/region/init";
$content=file_get_contents($url);
//$contents=iconv("UTF-8","GB2312",$content);
$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
foreach($v as $vkey=>$k)
{
$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
echo "1 record added";
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
echo "<br>";
}
}
mysql_close($con);
//var_dump($citys);
?>
爬虫爬二三四级:
<?php
$con =mysql_connect("192.168.1.26:3306","root","root");
if (!$con)
{
die('Could not connect: ' . mysql_error());
}
mysql_select_db("fruitdb", $con);
$sql="select * from t_zd_region where length(id)=4";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
$result = mysql_query($sql);
//$arr=mysql_fetch_row($result);
$arr = array ();
$row = mysql_fetch_assoc ( $result );
while ($row) {
$arr [] = $row;
$row = mysql_fetch_assoc ( $result );
//echo $row['Id'];
$url="http://res.42du.cn/region/city/".$row['Id']."";
$content=file_get_contents($url);
$contents=iconv("UTF-8","GBK",$content);
$citys=json_decode($content,true);
//var_dump($contents);
foreach($citys as $key=>$v)
{
foreach($v as $vkey=>$k)
{
$id = iconv("utf-8","GBK//IGNORE",$k["re_code"]);
$re_name = iconv("utf-8","GBK//IGNORE",$k["re_name"]);
$re_parent = iconv("utf-8","GBK//IGNORE",$k["re_parent"]);
$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
continue;
}
echo "1 record added";
echo $id;
echo $re_name;
echo $re_parent;
echo "<br>";
}
}
echo "<br>";
}
//$url="http://res.42du.cn/region/prov/"+$sqlid;
//$content=file_get_contents($url);
//$contents=iconv("UTF-8","GB2312",$content);
/*$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
foreach($v as $vkey=>$k)
{
$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
echo "1 record added";
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
echo "<br>";
}
}*/
mysql_close($con);
//var_dump($citys);
?>
爬虫爬最后一级:
<?php
$con =mysql_connect("192.168.1.26:3306","root","root");
if (!$con)
{
die('Could not connect: ' . mysql_error());
}
mysql_select_db("fruitdb", $con);
$sql="select * from t_zd_region where length(id)=9";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
$result = mysql_query($sql);
//$arr=mysql_fetch_row($result);
$arr = array ();
$row = mysql_fetch_assoc ( $result );
$a=0;
while ($row) {
$arr [] = $row;
$row = mysql_fetch_assoc ( $result );
//echo $row['Id'];
$url="http://res.42du.cn/region/".$row['Id']."";
$content=file_get_contents($url);
$contents=iconv("UTF-8","GBK",$content);
$citys=json_decode($content,true);
//var_dump($citys);
foreach($citys as $key=>$v)
{
//echo($v["re_code"]);
/*foreach($v as $vkey=>$k)
{*/
// echo $k["re_code"];
$id = iconv("utf-8","GBK//IGNORE",$v["re_code"]);
$re_name = iconv("utf-8","GBK//IGNORE",$v["re_name"]);
$re_parent = iconv("utf-8","GBK//IGNORE",$v["re_parent"]);
$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
{
//die('Error: ' . mysql_error());
$a++;
echo "aaa".$a;
echo "<br>";
continue;
}
echo "1 record added";
echo $id;
echo $re_name;
echo $re_parent;
echo "<br>";
/*}*/
}
echo "<br>";
}
//$url="http://res.42du.cn/region/prov/"+$sqlid;
//$content=file_get_contents($url);
//$contents=iconv("UTF-8","GB2312",$content);
/*$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
foreach($v as $vkey=>$k)
{
$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
{
die('Error: ' . mysql_error());
}
echo "1 record added";
//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
echo "<br>";
}
}*/
mysql_close($con);
//var_dump($citys);
?>