最近有个朋友问我一个问题,还是第一次处理这种问题。带跨行的表格要解析成想要的格式,为了方便朋友理解,将步骤拆的比较细。
总思路:遍历table补全所有缺失的td,然后再进行格式处理
1.分析
目标表格:
解析格式:
[
{"xqj":1,"ksjs":1,"js":3,"kcmc":"语文"},
{"xqj":2,"ksjs":1,"js":3,"kcmc":"数学"}
]
参数说明:
* xqj:星期几
* ksjs:开始节数
* js:节数,连续上的节数
* kcmc:课程名称
我们先分析一下,为什么要先遍历补全,下图是html源码,
我已标注了每一行tr下面的td数量,按道理如果是每一行都是8个的话,相信这个不是什么问题,那现在每行td数量不确定,我们怎么能知道到底是那个位置少了td?所以 我们需要先遍历,把确实td的地方找出来,并给他补上一条空内容。方便后面的遍历取值。不 bibibi了,上代码,大家勿喷。
2.盘他
round1:补全td
$file = "./kcb_demo.html";
$content = file_get_contents($file);
//匹配table
preg_match('/<table border="1" align="center" cellpadding="0" cellspacing="0">(.*)<\/table>/iUs', $content, $matchs);
$content = $matchs[0];
//print_r($content);exit;
//匹配tr
preg_match_all('/<tr(.*)<\/tr>/iUs', $content, $tr_matchs);
$tr_arr = $tr_matchs[1];
//移除第一行title,不影响大局,可有可无
unset($tr_arr[0]);
//将内容转存数组
$table = [];
foreach ($tr_arr as $tr_key => $tr) {
preg_match_all('/<td(.*)<\/td>/iUs', $tr, $td_matchs);
$td_arr = $td_matchs[0];
$table[$tr_key] = $td_arr;
}
//赋值另外一个数组,用来补全数组。(思考点:可以考虑转对象?这样不用再赋值给另外一个数组?)
$table2 = $table;
//按行遍历,判断每行下的td,rowspan属性是否大于1
foreach ($table as $tr_key => $tr) {
//注意,这里是对被修改后的table2进行遍历
$tr2 = $table2[$tr_key];
foreach ($tr2 as $td_key => $td) {
$rowspan = 0;
if ($td_key == 0) {
continue;
}
preg_match('/<td rowspan="(.*)"/iUs', $td, $rowspan_match);
// *** 如果rowspan大于说明,他要往同列下面延展,对应循环添加缺失的步骤
$rowspan = $rowspan_match[1];
if ($rowspan > 1) {
for ($span = 1; $span < $rowspan; $span++) {
//注意,这里是对table2进行赋值
array_splice($table2[$tr_key + $span], $td_key, 0, ['<td rowspan="1" align="center"> </td>']);
}
}
}
}
//打印看看是否已经对应补全了td ?
print_r($table2);
round2:按列遍历,取最终要的格式
//按列遍历
$return = [];
//列,代表星期
$xqj_arr = [1, 2, 3, 4, 5, 6, 7];
foreach ($xqj_arr as $xqj) {
$row_pos = 1;
foreach ($table2 as $tr_key => $tr) {
$kc_long = 0;
//指定行数
if ($tr_key == $row_pos) {
$td = $tr[$xqj]; //某一行下面,对应星期几的td
//获取rowspan的数值
preg_match('/<td rowspan="(.*)"/iUs', $td, $rowspan_match);
$kc_long = $rowspan_match[1];
//获取课程名
preg_match('/>(.*)<\/td>/iUs', $td, $km);
if (isset($km[1]) && $km[1] != "") {
$return[] = ["xqj" => $xqj, "skjc" => $row_pos, "skcd" => $kc_long, "kcmc" => $km[1]];
}
}
// * 根据rowspan的数值,直接跳到下面的 +rowspan 行
$row_pos += $kc_long;
}
}
echo json_encode($return, JSON_UNESCAPED_UNICODE);
3.源码
课程表html
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<table border="1" align="center" cellpadding="0" cellspacing="0">
<tbody>
<tr height=30>
<td width="60" align="center"> </td>
<td width="100" align="center" >星期一<br>2021-11-08</td>
<td width="100" align="center" >星期二<br>2021-11-09</td>
<td width="100" align="center" >星期三<br>2021-11-10</td>
<td width="100" align="center" >星期四<br>2021-11-11</td>
<td width="100" align="center" >星期五<br>2021-11-12</td>
<td width="100" align="center" >星期六<br>2021-11-13</td>
<td width="100" align="center" >星期日<br>2021-11-14</td>
</tr>
<tr>
<td align="center">第1节<br><br></td>
<td rowspan="2" align="center" >语文</td>
<td rowspan="2" align="center" >数学</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="3" align="center" >政治</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第2节<br><br></td>
<td rowspan="3" align="center" >英语</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第3节<br><br></td>
<td rowspan="2" align="center" >体育</td>
<td rowspan="2" align="center" >科学</td>
<td rowspan="2" align="center" >美术</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第4节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">中午1<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">中午2<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第5节<br><br></td>
<td rowspan="2" align="center" >数学</td>
<td rowspan="2" align="center" >语文</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="2" align="center" >体育</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第6节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第7节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第8节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第9节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="2" align="center" >政治
</td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center" >第10节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center" >第11节<br><br></td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
<td rowspan="1" align="center" > </td>
</tr>
<tr>
<td align="center">第12节<br><br></td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
<td rowspan="1" align="center"> </td>
</tr>
</tbody>
</table>
php源码
<?php
/*
* sanmo.zheng
* 课程表解析
* 1.遍历table补全所有缺失的td,然后再进行格式处理
* 目标格式:
* [
* {"xqj":1,"ksjs":1,"js":3,"kcmc":"语文"},
* {"xqj":2,"ksjs":1,"js":3,"kcmc":"数学"}
* ]
* 参数说明:
* xqj:星期几
* ksjs:开始节数
* js:节数,连续上的节数
* kcmc:课程名称
*/
$file = "./kcb_demo.html";
$content = file_get_contents($file);
//匹配table
preg_match('/<table border="1" align="center" cellpadding="0" cellspacing="0">(.*)<\/table>/iUs', $content, $matchs);
$content = $matchs[0];
//print_r($content);exit;
//匹配tr
preg_match_all('/<tr(.*)<\/tr>/iUs', $content, $tr_matchs);
$tr_arr = $tr_matchs[1];
//移除第一行title,不影响大局,可有可无
unset($tr_arr[0]);
//将内容转存数组
$table = [];
foreach ($tr_arr as $tr_key => $tr) {
preg_match_all('/<td(.*)<\/td>/iUs', $tr, $td_matchs);
$td_arr = $td_matchs[0];
$table[$tr_key] = $td_arr;
}
//赋值另外一个数组,用来补全数组。(思考点:可以考虑转对象?这样不用再赋值给另外一个数组?)
$table2 = $table;
//按行遍历,判断每行下的td,rowspan属性是否大于1
foreach ($table as $tr_key => $tr) {
//注意,这里是对被修改后的table2进行遍历
$tr2 = $table2[$tr_key];
foreach ($tr2 as $td_key => $td) {
$rowspan = 0;
if ($td_key == 0) {
continue;
}
preg_match('/<td rowspan="(.*)"/iUs', $td, $rowspan_match);
// *** 如果rowspan大于说明,他要往同列下面延展,对应循环添加缺失的步骤
$rowspan = $rowspan_match[1];
if ($rowspan > 1) {
for ($span = 1; $span < $rowspan; $span++) {
//注意,这里是对table2进行赋值
array_splice($table2[$tr_key + $span], $td_key, 0, ['<td rowspan="1" align="center"> </td>']);
}
}
}
}
//按列遍历
$return = [];
//列,代表星期
$xqj_arr = [1, 2, 3, 4, 5, 6, 7];
foreach ($xqj_arr as $xqj) {
$row_pos = 1;
foreach ($table2 as $tr_key => $tr) {
$kc_long = 0;
//指定行数
if ($tr_key == $row_pos) {
$td = $tr[$xqj]; //某一行下面,对应星期几的td
//获取rowspan的数值
preg_match('/<td rowspan="(.*)"/iUs', $td, $rowspan_match);
$kc_long = $rowspan_match[1];
//获取课程名
preg_match('/>(.*)<\/td>/iUs', $td, $km);
if (isset($km[1]) && $km[1] != "") {
$return[] = ["xqj" => $xqj, "skjc" => $row_pos, "skcd" => $kc_long, "kcmc" => $km[1]];
}
}
// * 根据rowspan的数值,直接跳到下面的 +rowspan 行
$row_pos += $kc_long;
}
}
echo json_encode($return, JSON_UNESCAPED_UNICODE);