需求:采集携程网酒店信息
步骤:
- 使用火车头编写采集规则并保存到txt文件
- 编写php脚本读取txt文件并按照规则保存到csv
代码:
<?php
/**
* 读取txt文件,存入csv表格中
*/
$file_name = 'hotel_2018-3-8.txt';
$file = fopen($file_name,"r");
$data = [];
while(! feof($file))
{
$str = fgets($file);
$arr_line = filter_my($str);
$data[] = $arr_line;
}
fclose($file);
put_csv_my($data);
echo 'OK';
/**
* 字符串过滤
* @param [type] $str [description]
* @return [type] [description]
*/
function filter_my($str){
//这里写你自己的逻辑
$str = str_replace(' ', '', $str);
$str = str_replace('!!', '', $str);
$str = str_replace('[]', '', $str);
$str = str_replace(' ', '', $str);
$tmp_arr = explode(',,', $str);
return $tmp_arr;
}
/**
* 写入csv
* @param [type] $dataList [description]
* @return [type] [description]
*/
function put_csv_my($dataList){
//这里写你自己的逻辑
$fp = fopen('hotel_2018-3-8.csv', 'w');
fputcsv($fp,array(iconv("UTF-8", "GB2312//IGNORE",'酒店名称'),iconv("UTF-8", "GB2312//IGNORE",'地址'),
iconv("UTF-8", "GB2312//IGNORE",'房间数'),iconv("UTF-8", "GB2312//IGNORE",'开业时间'),iconv("UTF-8", "GB2312//IGNORE",'联系电话')));
foreach ($dataList as $data) {
$name = iconv("UTF-8", "GB2312//IGNORE",isset($data[0]) ? $data[0] : '');
$addr = iconv("UTF-8", "GB2312//IGNORE",isset($data[1]) ? $data[1] : '');
$house = iconv("UTF-8", "GB2312//IGNORE",isset($data[2]) ? $data[2] : '');
$open = iconv("UTF-8", "GB2312//IGNORE",isset($data[3]) ? $data[3] : '');
$tel = iconv("UTF-8", "GB2312//IGNORE",isset($data[4]) ? $data[4] : '');
fputcsv($fp,array($name,$addr,$house,$open,$tel)); //fputcsv可以用数组循环的方式进行实现
}
fclose($fp);
}