php爬虫 爬省市县

数据库的表结构:

 

 

 

 

 

 

 

 

 

 

 

 

 

 

php 爬虫爬第一级:

 

<?php
$con =mysql_connect("127.0.0.1:3306","root","root");
if (!$con)
  {
  die('Could not connect: ' . mysql_error());
  }
  mysql_select_db("fruitdb", $con);
$url="http://res.42du.cn/region/init";
$content=file_get_contents($url);
//$contents=iconv("UTF-8","GB2312",$content);
$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
	foreach($v as $vkey=>$k)
	{
		$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
		$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
		$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
  {
  die('Error: ' . mysql_error());
  }
echo "1 record added";
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		echo "<br>";
	}
}

mysql_close($con);
//var_dump($citys);
?>

爬虫爬二三四级:

<?php
$con =mysql_connect("192.168.1.26:3306","root","root");
if (!$con)
  {
  die('Could not connect: ' . mysql_error());
  }
  mysql_select_db("fruitdb", $con);
$sql="select * from t_zd_region where length(id)=4";

if (!mysql_query($sql,$con))
  {
  die('Error: ' . mysql_error());
  }
	$result = mysql_query($sql);
	//$arr=mysql_fetch_row($result);
	
	  $arr = array ();
            $row = mysql_fetch_assoc ( $result );
            while ($row) {
                $arr [] = $row;
                $row = mysql_fetch_assoc ( $result );
			    //echo $row['Id'];
				$url="http://res.42du.cn/region/city/".$row['Id']."";
				$content=file_get_contents($url);
				$contents=iconv("UTF-8","GBK",$content);
				$citys=json_decode($content,true);
				//var_dump($contents);
				foreach($citys as $key=>$v)
					{
					foreach($v as $vkey=>$k)
						{
					$id = iconv("utf-8","GBK//IGNORE",$k["re_code"]);
					$re_name = iconv("utf-8","GBK//IGNORE",$k["re_name"]);
					$re_parent = iconv("utf-8","GBK//IGNORE",$k["re_parent"]);
					$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
					('$id','$re_name','$re_parent')";
						if (!mysql_query($sql,$con))
						 {

					 die('Error: ' . mysql_error());
					 continue;
							 }
						echo "1 record added";
						echo $id;
						echo $re_name;
						echo $re_parent;
						echo "<br>";
						}
					}
				echo "<br>";
			}

//$url="http://res.42du.cn/region/prov/"+$sqlid;
//$content=file_get_contents($url);

//$contents=iconv("UTF-8","GB2312",$content);
/*$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
	foreach($v as $vkey=>$k)
	{
		$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
		$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
		$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
  {
  die('Error: ' . mysql_error());
  }
echo "1 record added";
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		echo "<br>";
	}
}*/

mysql_close($con);
//var_dump($citys);
?>

爬虫爬最后一级:

<?php
$con =mysql_connect("192.168.1.26:3306","root","root");
if (!$con)
  {
  die('Could not connect: ' . mysql_error());
  }
  mysql_select_db("fruitdb", $con);
$sql="select * from t_zd_region where length(id)=9";

if (!mysql_query($sql,$con))
  {
  die('Error: ' . mysql_error());
  }
	$result = mysql_query($sql);
	//$arr=mysql_fetch_row($result);
	
	  $arr = array ();
            $row = mysql_fetch_assoc ( $result );
            $a=0;
            while ($row) {
                $arr [] = $row;
                $row = mysql_fetch_assoc ( $result );
			    //echo $row['Id'];
				$url="http://res.42du.cn/region/".$row['Id']."";
				$content=file_get_contents($url);
				$contents=iconv("UTF-8","GBK",$content);
				$citys=json_decode($content,true);
				//var_dump($citys);
				foreach($citys as $key=>$v)
					{
						//echo($v["re_code"]);
					/*foreach($v as $vkey=>$k)
						{*/
					//	echo	$k["re_code"];
					$id = iconv("utf-8","GBK//IGNORE",$v["re_code"]);
					$re_name = iconv("utf-8","GBK//IGNORE",$v["re_name"]);
					$re_parent = iconv("utf-8","GBK//IGNORE",$v["re_parent"]);
				
					$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
					('$id','$re_name','$re_parent')";
						if (!mysql_query($sql,$con))
						 {
					 //die('Error: ' . mysql_error());
					 $a++;
					 echo "aaa".$a;
					 echo "<br>";
					  continue;
							 }
						echo "1 record added";
						echo $id;
						echo $re_name;
						echo $re_parent;
						echo "<br>";
						/*}*/
					}
				echo "<br>";
			}

//$url="http://res.42du.cn/region/prov/"+$sqlid;
//$content=file_get_contents($url);

//$contents=iconv("UTF-8","GB2312",$content);
/*$citys=json_decode($content,true);
foreach($citys as $key=>$v)
{
	foreach($v as $vkey=>$k)
	{
		$id = iconv("utf-8","gb2312//IGNORE",$k["re_code"]);
		$re_name = iconv("utf-8","gb2312//IGNORE",$k["re_name"]);
		$re_parent = iconv("utf-8","gb2312//IGNORE",$k["re_parent"]);
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		$sql="INSERT INTO t_zd_region (id, re_name, re_parent) VALUES
('$id','$re_name','$re_parent')";
if (!mysql_query($sql,$con))
  {
  die('Error: ' . mysql_error());
  }
echo "1 record added";
		//echo iconv("UTF-8","GB2312",$k["re_code"]." ".$k["re_name"]." ".$k["re_parent"]);
		echo "<br>";
	}
}*/

mysql_close($con);
//var_dump($citys);
?>

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值