老师给了一个会议论文统计的excel文件,其中统计了总共提交的将近200篇论文的ID, 最终题目,作者,任务是把每一篇论文的email填充上去。手动填充费时易错,于是考虑写程序来做这件事:
- 使用php的pear库直接读写excel文件。需要安装 pear,可能还需要做些配置工作,额外工程量太大;
- 把excel文件另存为csv文件。缺点是太不直观;
- 把excel文件另存为html文件,直接解析html;
下面是用php解析html文件并填充论文email的php代码:
<?php$link = mysql_connect('server', 'user', 'password');if($link === false) { die(mysql_error());}if(mysql_select_db('database') === false) { die(mysql_error());}function getEmail($paper_id) { global $link; if(!is_numeric($paper_id)) { trigger_error('paper id is not numeric'); return; } $res = mysql_query("select emailContact from Paper where id = '$paper_id'"); $e = mysql_fetch_row($res); return $e[0];}$html_file = '/home/whb/doc/gridlab/fcst2010/papers_1_52+52.html';$dom = new DomDocument();$dom->loadHTMLFile($html_file);$dom->preserveWhiteSpace = false;$tr_list = $dom->getElementsByTagName('tr');for($i = 0; $i < $tr_list->length; $i++) { $tr = $tr_list->item($i); $td = $tr->firstChild; if($td == NULL) { echo "Line: $i has no columns"; continue; } // $td->nodeName is "td", $td->nodeValue is paper id // create new <td></td> element and determine its value $paper_id = $td->nodeValue; $td_node = $dom->createElement("td", ' '); if(is_numeric($paper_id) && $paper_id > 0) { $td_node->nodeValue = getEmail($paper_id); } // append the newly created <td></td> to the corresponding <tr></tr> try { $tr->appendChild($td_node); } catch(Exception $e) { echo $e->getMessage(); }}echo $dom->saveHTML();?>
再分享一下我老师大神的人工智能教程吧。零基础!通俗易懂!风趣幽默!还带黄段子!希望你也加入到我们人工智能的队伍中来!https://blog.csdn.net/jiangjunshow