function site() {
ini_set('memory_limit', '512M');
ini_set('max_execution_time', 360);
// phpinfo();
$connect = mysql_connect('localhost', 'root', '123456') or die('Not connected : ' . mysql_error());
// make foo the current db
mysql_select_db('crawl', $connect) or die('Can\'t use foo : ' . mysql_error());
$site_url = 'http://itjuzi.com/investfirm';
for($i = 313; $i>0; $i--) {
if($i%10==0){
sleep(5);
}
echo 'page = ' . $i . '<br/>';
$data = file_get_contents($site_url . '?page=' . $i);
preg_match_all('/(detail-info\"\>[(\s\S)]*<li>)(.*)(<\/li>[(\s\S)]*<\/ul>)/Uis', $data, $matches, PREG_PATTERN_ORDER);
if(!isset($matches[0]) || !$matches[0]) {
echo 'current page. ' . $page . ' , finished';
exit();
}
foreach($matches[0] as $key => $val) {
$val = str_replace('detail-info">', '', $val);
$val = str_replace('</ul>', '', $val);
preg_match_all('/<li>(.*)<\/li>/iUs', $val, $_matches, PREG_PATTERN_ORDER);
$arr = array();
foreach($_matches[1] as $items) {
$field = preg_replace('/名称: |网址: |阶段: |领域: |介绍: /', '', strip_tags($items));
array_push($arr, $field);
}
$mdata = array(
'name' => $arr[0],
'site' => $arr[1],
'stage' => $arr[2],
'scopes' => $arr[3],
'description' => $arr[4]
);
$_mdata = array_keys($mdata);
$sql = 'insert into `crawl` (`' . implode('`,`', $_mdata) . '`) VALUES (\'' . implode('\',\'', $mdata) . '\')';
mysql_query('set names utf8');
$result = mysql_query($sql);
echo $sql;
echo '<br/>';
}
var_dump($result);
//echo $sql;
}
exit();
//dump($data);
}