问题描述:
对应HTML代码如下:
<table style="table-layout:fixed; width:100%; border-collapse:collapse; border-spacing:0;">
<colgroup>
<col width="16%">
<col width="14%">
<col width="14%">
<col width="14%">
<col width="14%">
<col width="14%">
<col width="14%">
</colgroup>
<thead>
<tr>
<th style="margin:0; padding:0; height:45px; font-weight:bold; font-size:12px; text-align:center; border-bottom:1px solid #ddd;" colspan="7">详细尺码信息</th>
</tr>
</thead>
<tbody>
<tr>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;">面料</td>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;" colspan="6">上衣-棉85/人造丝10/弹力5 裙-棉95/弹力5</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;">尺码</td>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;" colspan="6">s,m</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;">颜色</td>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;" colspan="6">白色</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;">洗涤方式</td>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;" colspan="6">干洗/手洗</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;">备注</td>
<td style="margin:0; padding:10px 0; height:30px; text-align:left; font-size:13px;" colspan="6">
<p style="margin:0; padding:0; line-height:28px; font-size:13px;">*由于测量方法不同,尺寸数据可能存在误差</p>
<p style="margin:0; padding:0; line-height:28px; font-size:13px;">*由于电脑显示器的分辨率不同,颜色可能出现一些不同的现象</p>
<p> </p>
</td>
</tr>
</tbody>
<tfoot>
<tr>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">尺码(cm)</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">肩宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">袖长</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">胸宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">腰宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">全长</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">-</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">上衣</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">41</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">20</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">47</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">62</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">下衣尺码(cm)</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">腰宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">臀宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">底宽</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">内长度</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">全长</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd; font-weight:bold; border-color:#333;">全长</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">M</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">34</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">38</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">41-51</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
</tr>
<tr>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">L</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">36</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">40</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">42-52</td>
<td style="margin:0; padding:10px 0; height:30px; font-size:12px; font-weight:400; text-align:left; border-top:1px solid #ddd;">-</td>
</tr>
</tfoot>
</table>
现需将该尺码表整理成类似如下数据格式:
Material:Cotton
Size:上衣/M/L
上衣:waist:32 hip:42 length:42.5
M:waist:34 hip:44 length:43
L:waist:34 hip:44 length:43
解决方案:
<?php
private function getPrdDtiSize($prd_dti)
{
/*****************************************************************************************************************************
$tfoot_header_line = array();
$table_size_list = array();
$dti_size = array();
$tmp_str = '';
$prdDtiLngMNG = Cl::get('PrdDtiLngManager');
/****************************************************************************************************************************/
preg_match_all("@<tbody[^>]*?>[\s\S]*?<\/tbody>@i",$prd_dti['prd_dti_bdy_htm'], $size_tbody);//面料
preg_match_all("@<tfoot[^>]*?>[\s\S]*?<\/tfoot>@i",$prd_dti['prd_dti_bdy_htm'], $size_tfoot);//详细尺码
preg_match_all("@<td[^>]*?>[\s\S]*?<\/td>@i",$size_tbody[0][0], $tmp_tbody_datas);
preg_match_all("@<td[^>]*?>[\s\S]*?<\/td>@i",$size_tfoot[0][0], $tmp_tfoot_datas);//每个单元格
preg_match_all("@<tr[^>]*?>[\s\S]*?<\/tr>@i",$size_tfoot[0][0], $tmp_tfoot_lines);//每一行
//面料获取&面料翻译
foreach ($tmp_tbody_datas[0] AS $key => $tmp_tbody_data)
{
//去掉字符串中的HTML标签及 特殊空格
$tmp_tbody_datas[0][$key]=str_replace(' ','',trim((string)strip_tags($tmp_tbody_data)));
}
$material_key = array_search('面料',$tmp_tbody_datas[0]);
$tmp_material_str = $tmp_tbody_datas[0][$material_key+1];//面料
preg_match_all("/[\x{4e00}-\x{9fa5}]+/u", $tmp_material_str, $tmp_material_cns);
$material = $tmp_material_str;
if(!empty($tmp_material_cns[0]))
{
//数据库中查找对应的英文翻译
$tmp_material_ens = $prdDtiLngMNG->loadKeyByPrdDtiLngNmCns( $tmp_material_cns[0], 'prd_dti_lng_nm_cn');
if(!empty($tmp_material_ens))
{
//可用array_column函数处理,但未成功
foreach ($tmp_material_ens AS $key => $tmp_material_en)
{
$material_ens[] = $tmp_material_en['prd_dti_lng_nm_en'];
}
$material = str_replace(array_keys($tmp_material_ens), $material_ens, $tmp_material_str);
}
}
//获取详细尺码表中所有表头出现的所在行数
foreach ($tmp_tfoot_lines[0] AS $key => $tmp_tfoot_line)
{
if(stripos($tmp_tfoot_line,'font-weight:bold;') !== false){
$tfoot_header_line[] = $key;
}
}
//每个单元格数据按行分组&尺码翻译
foreach ($tmp_tfoot_datas[0] AS $key => $tmp_tfoot_data)
{
$tmp_tfoot_datas[0][$key] = str_replace(' ',' ',trim((string)strip_tags($tmp_tfoot_data)));
}
$prd_dti_lngs = $prdDtiLngMNG->loadKeyByPrdDtiLngNmCns( $tmp_tfoot_datas[0], 'prd_dti_lng_nm_cn');
foreach ($tmp_tfoot_datas[0] AS $key => $tmp_tfoot_data)
{
$tmp_tfoot_datas[0][$key] = isset($prd_dti_lngs[$tmp_tfoot_data]) ? $prd_dti_lngs[$tmp_tfoot_data]['prd_dti_lng_nm_en'] : $tmp_tfoot_data;
}
$tmp_tfoot_line = array_chunk($tmp_tfoot_datas[0],(int)count($tmp_tfoot_datas[0])/count($tmp_tfoot_lines[0]));
//每个表头与尺码数据对应,如尺码=>S,腰围=>64
foreach ($tfoot_header_line AS $key => $val)
{
if($val != end($tfoot_header_line) ){
for($i = $val; $i < $tfoot_header_line[$key+1]-1; $i++){
$table_size_list['dti_size'][] = array_combine($tmp_tfoot_line[$val],$tmp_tfoot_line[$i+1]);
}
}else{
for($i = $val; $i < count($tmp_tfoot_lines[0])-1; $i++){
$table_size_list['dti_size'][] = array_combine($tmp_tfoot_line[$val],$tmp_tfoot_line[$i+1]);
}
}
}
//数据整理成类似如下格式S:waist:32 hip:42 length:42.5
foreach ($table_size_list['dti_size'] AS $k => $table_size)
{
$tmp_str = reset($table_size).':';
array_shift($table_size);
foreach ($table_size AS $key=>$val)
{
$tmp_str .= "$key:$val ";
}
$dti_size[].=$tmp_str;
}
$table_size_list['material'] = $material;
$size =array_map('reset',$table_size_list['dti_size']);
$table_size_list['simp_size'] = implode('/',$size);//如S/M
$table_size_list['dti_size'] = $dti_size;
return $table_size_list;
}
?>
Array
(
[dti_size] => Array
(
[0] => jupe:shoulder :41 sleeve:20 chest:47 腰宽:- 全长:62 -:-
[1] => M:腰宽:34 臀宽:38 底宽:- 内长度:- 全长:-
[2] => L:腰宽:36 臀宽:40 底宽:- 内长度:- 全长:-
)
[material] => jupe-棉85/人造丝10/弹力5 裙-棉95/弹力5
[simp_size] => jupe/M/L
)