<!DOCTYPE html PUBLIC “-//W3C//DTD XHTML 1.0 Transitional//EN” “http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd”>
<html xmlns=”http://www.w3.org/1999/xhtml”>
<head>
<meta http-equiv=”Content-Type” content=”text/html; charset=utf-8″ />
<title>alibaba采集</title>
<?php
set_time_limit(0);
function _rand() {
$length=26;
$chars = “0123456789abcdefghijklmnopqrstuvwxyz”;
$max = strlen($chars) – 1;
mt_srand((double)microtime() * 1000000);
$string = ”;
for($i = 0; $i < $length; $i++) {
$string .= $chars[mt_rand(0, $max)];
}
return $string;
}
error_reporting(0);
ini_set(‘html_errors’,false);
ini_set(‘display_errors’,false);
mysql_pconnect(“localhost”, “root”, “password”) or
die (“Could not connect” . mysql_error());
mysql_select_db(“company”);
mysql_query(“SET NAMES ‘UTF8′”);
if($_GET['page']){
$page = $_GET['page'];
}else{
$page = ’1′;
}
$HTTP_SESSION=_rand();
$HTTP_SESSION;
$HTTP_URL=”http://www.alibaba.com/corporations/jiangmen/CN——————————–/”.$page.”.html”;
$ch = curl_init();
curl_setopt ($ch,CURLOPT_URL,$HTTP_URL);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_USERAGENT,”Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)”);
$res = curl_exec($ch);
curl_close ($ch);
preg_match_all(‘/href\s*=\s*["|\']?([^\s"\'>]*).en.alibaba.com\”/i’,$res,$arr);
foreach($arr[1] as $a=>$web){
$HTTP_SESSION=_rand();
$HTTP_SESSION;
$HTTP_Server=$web;
$HTTP_URL=”.en.alibaba.com/contactinfo.html”;
$ch1 = curl_init();
curl_setopt ($ch1,CURLOPT_URL,$HTTP_Server.$HTTP_URL);
curl_setopt($ch1,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch1,CURLOPT_USERAGENT,”Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)”);
$res1 = curl_exec($ch1);
curl_close ($ch1);
preg_match(“/contactName(.*?)<\/a>/s”,$res1,$name);
$name = strip_tags($name[1]);
//$name = str_replace(“\”",”",$name);
//$name = str_replace(“>”,”",$name);
$name = trim($name);
preg_match(“/Company Name:(.*?)<\/td>/s”,$res1,$Cname);
$Cname = trim(strip_tags($Cname[1]));
preg_match(“/Street Address:(.*?)<\/td>/s”,$res1,$Add);
$Add = trim(strip_tags($Add[1]));
preg_match(“/City:(.*?)<\/td>/s”,$res1,$City);
$City = trim(strip_tags($City[1]));
preg_match(“/Province\/State:(.*?)<\/td>/s”,$res1,$Pronvice);
$Pronvice = trim(strip_tags($Pronvice[1]));
preg_match(“/Country\/Region:(.*?)<\/td>/s”,$res1,$Region);
$Region = trim(strip_tags($Region[1]));
preg_match(“/Zip:(.*?)<\/td>/s”,$res1,$Zip);
$Zip = trim(strip_tags($Zip[1]));
preg_match(“/Telephone:(.*?)<\/td>/s”,$res1,$Tel);
$Tel = trim(strip_tags($Tel[1]));
preg_match(“/Mobile Phone:(.*?)<\/td>/s”,$res1,$Phone);
$Phone = trim(strip_tags($Phone[1]));
preg_match(“/Fax:(.*?)<\/td>/s”,$res1,$Fax);
$Fax = trim(strip_tags($Fax[1]));
preg_match(“/Website:(.*?)<\/td>/s”,$res1,$Web);
$Web = trim(strip_tags($Web[1]));
$result = mysql_query(“
INSERT INTO alibaba (
Name,
Company,
Address,
City,
Province,
Region,
Zip,
Tel,
Phone,
Fax,
Web
) VALUES(
‘”.htmlspecialchars($name).”‘,
‘”.htmlspecialchars($Cname).”‘,
‘”.htmlspecialchars($Add).”‘,
‘”.htmlspecialchars($City).”‘,
‘”.htmlspecialchars($Pronvice).”‘,
‘”.htmlspecialchars($Region).”‘,
‘”.htmlspecialchars($Zip).”‘,
‘”.htmlspecialchars($Tel).”‘,
‘”.htmlspecialchars($Phone).”‘,
‘”.htmlspecialchars($Fax).”‘,
‘”.htmlspecialchars($Web).”‘
)”);
}
if($page >= 29){
echo “OVER!”;exit();
}else{
echo “<meta http-equiv=refresh content=’0; url=alibaba.php?page=”.++$page.”‘>”;
}
?>
</head>
<body>
</body></html>