利用JQuery强大的DOM操纵能力来采集页面数据,
然后组织数据以POST的方式发送数据给自身,
自身接收POST来的数据再以CSV格式写入到文件.
set_time_limit(0);
$num = range(0, 49100, 100);
$base = 'http://www.zjchina.org/mspMajorIndexAction.fo?&startcount=';
$page = isset($_GET['startcount']) ? $_GET['startcount'] : 0;
$next_url = $_SERVER['SCRIPT_NAME'].'?startcount='.($page+1);
if ( !isset($num[$page]) ) { exit('采集完了'); }
//提交数据
if ( $_POST && count($_POST) && isset($_POST['send']) ) {
$send = $_POST['send'];
$file = dirname(__FILE__).'/data.csv';
if ( file_exists($file) ) { unset($send[0]); }
$fp = fopen($file, 'a+');
foreach($send as $line) { fputcsv($fp, $line); }
fclose($fp);
exit(json_encode(array('jump' => $next_url)));
}
//抓取数据
$html = file_get_contents($base.$num[$page]);
$html = str_replace('script', 'pre', $html);
$html .= '
$(function(){
var data = [];
var url = window.location.href;
var $tr = $("#A5 table tr");
$tr.each(function(){
var tds = [];
$(this).children("td").each(function(){
tds.push($(this).text());
});
data.push(tds);
});
$.post(url, { send: data }, function(ret){
if ( ret.jump ) {
window.location.href = ret.jump;
}
}, "json");
});
';
echo $html;