#################################################################################################################################
# File: excel.pl
# Desscription: Perl读取Excel文件
# Create: Thinkhy
# Date: 2010.05.04
# History:
# 2010.05.04 确定转换规则
# 2010.05.05 确定技术方案(Excel、XML操作库)
# 2010.05.07 完成编码
# 2010.05.10 根据确认意见,修改:
# 1> 输出XML保持有序(sequence)
#
# Q: 1. 暂只支持"绝对路径"的中文文件名? A: 暂无解决办法
# 2. 怎么确认Excel文件共有多少行? A: 已解决,参考:http://blog.csdn.net/lanman/archive/2007/07/10/1684191.aspx
# 3. 输出XML添加GBK的编码声明? A: 已解决,调用接口:createProcessingInstruction appendChild
#################################################################################################################################
##!/usr/local/bin/perl
use strict;
use Tie::IxHash; # 使用CPAN包 IxHash 使哈希值保持原有顺序
use Win32::OLE qw(in with);
use Win32::OLE::Const 'Microsoft Excel';
use constant MSXMLDOM => "MSXML2.DOMDOCUMENT.4.0";
$Win32::OLE::Warn = 3; #die on errors
# 文件名中须包括绝对路径
my $filename = $ARGV[0]; # 必须用绝对路径
my $output_file = $ARGV[1]; # 可以用相对路径
die "[Usage: excel.pl inputfile outputfile]" if (!$filename);
my $excel = Win32::OLE->GetActiveObject('Excel.Application')
|| Win32::OLE->new('Excel.Application', 'Quit');
my $book0 = $excel->Workbooks->Open($filename);
my $sheet0 = $book0->Worksheets(1);
# 获得Excel文件的行数
# 参考: http://blog.csdn.net/lanman/archive/2007/07/10/1684191.aspx
my $row_num = $sheet0->{UsedRange}->{Rows}->{Count};
my @collection;
my @array;
# 生成
if ($row_num > 0) {
# 生成头结点
my $doc = Win32::OLE->new(MSXMLDOM) or die
"Couldn't create @{[MSXMLDOM]}";
$doc->{async} = "False";
$doc->{validateOnParse} = "True";
# 新建PI结点 默认编码为GBK
my $pi_node = $doc->createProcessingInstruction("xml", "version='1.0' encoding='GBK'");
$doc->appendChild($pi_node); # 记得创建PI结点后要手动append到doc结点
# 新建根结点
my $root = $doc->createElement("root");
$doc->appendChild($root);
$doc->{documentElement} = $root;
my $head = $doc->createElement("head");
$root->appendChild($head);
# 新建头结点
my %head_info;
tie (%head_info, 'Tie::IxHash');
%head_info = ( # 哈希要用小括号包起来?
"class" => "条目",
"identity" => "",
"copyright" => "",
"publishtime" => "2002-04-10", # [TODO] 这里需要取当前时间
"state" => "中国",
"area" => "北京",
);
foreach my $key (keys %head_info) {
my $value = $head_info{$key};
my $node = $doc->createElement($key);
$node->{Text} = $value;
$head->appendChild($node);
}
# 新建main结点
my $main = $doc->createElement("main");
$root->appendChild($main);
my @tmp_array = (1..156);
my $cnt_item = 1;
for (my $num = 1; $num < $row_num; $num++) {
my $item_name = getcell($sheet0, $num, 'A'); # 地名
my $pinyin = getcell($sheet0, $num, 'B'); # 拼音
my $abbr = getcell($sheet0, $num, 'C'); # 简称
my $zoneid = getcell($sheet0, $num, 'D'); # 区号
my $zipcode = getcell($sheet0, $num, 'E'); # 邮编
my $author = getcell($sheet0, $num, 'M'); # 作者
my $writetime = getcell($sheet0, $num, 'K'); # 时间
$writetime =~ s//./-/ig;
my $trace = getcell($sheet0, $num, 'L'); # 来源
# 首列(条目名)或拼音 为空则过滤此行
# print $item_name, $pinyin, $abbr, $zoneid, $zipcode , "/n";
next if (!$item_name || (!$pinyin && !$abbr && !$zoneid && !$zipcode));
# 过滤首列 内容为 “地名” 的行
next if ($item_name =~ //s*地名/s*/);
# 新建 item 结点
my $item = $doc->createElement("item");
$main->appendChild($item);
$item->setAttribute("id",$cnt_item++);
$item->setAttribute("fatherid",0);
my %main_info;
tie (%main_info, 'Tie::IxHash');
%main_info = ( # 哈希要用小括号包起来?
"type" => "",
"name" => $item_name,
"ename" => "",
"spell" => $pinyin,
"volume" => "",
"subclass" => "",
"author" => $author,
"writetime" => $writetime,
"referitem" => $item_name,
"trace" => $trace,
"links" => "",
"elementfiles" => "",
);
# 构建item下一级内容
foreach my $key (keys %main_info) {
my $value = $main_info{$key};
my $node = $doc->createElement($key);
$node->{Text} = $value;
$item->appendChild($node);
if ($key eq "elementfiles") {
my $sourcetype = $doc->createElement("sourcetype");
my $count = $doc->createElement("count");
$node->appendChild($sourcetype);
$node->appendChild($count);
}
}
# 省级、市级元素按HOTLINK处理
my $province = getcell($sheet0, $num, 'I'); # 省级
my $city = getcell($sheet0, $num, 'J'); # 市级
my $links = $item->selectSingleNode("links");
$links || die "未创建links结点";
my ($hotlink1, $hotlink2);
# 省级元素不为空则创建HOTLINK
if ($province) {
my $hotlink_pro = $doc->createElement("HOTLINK");
$hotlink_pro->{Text} = $province;
$hotlink1 = $hotlink_pro->cloneNode(1);
$links->appendChild($hotlink_pro);
}
if ($city) {
my $hotlink_city = $doc->createElement("HOTLINK");
$hotlink_city->{Text} = $city;
$hotlink2 = $hotlink_city->cloneNode(1);
$links->appendChild($hotlink_city);
}
my $body = $doc->createElement("body");
$item->appendChild($body);
my $body_cdata = $doc->createElement("body_cdata");
my $province = getcell($sheet0, $num, 'I');&nbs p; # 省级
my $city = getcell($sheet0, $num, 'J'); # 市级
my $population = getcell($sheet0, $num, 'F'); # 人口
my $area = getcell($sheet0, $num, 'G'); # 面积
my $summary = getcell($sheet0, $num, 'H'); # 概况
my %cdata_info;
tie (%cdata_info, 'Tie::IxHash');
%cdata_info = (
"简称" => "$abbr",
"区号" => "$zoneid",
"邮编" => "$zipcode",
"人口(万人)" => "$population",
"面积(平方千米)" => "$area",
"概况" => "$summary",
"省级" => "$hotlink1->{Text}",
"市级" => "$hotlink2->{Text}",
);
# 构建CDATA内容
foreach my $key (keys %cdata_info) {
my $value = $cdata_info{$key};
next if (!$value); # 值为空则忽略
# 每个项目一行(<p>)
my $p_node = $doc->createElement("p");
$body_cdata->appendChild($p_node);
# 字段名称加粗(<bold>)
my $bold_node = $doc->createElement("bold");
$bold_node->{Text} = $key.":";
$p_node->appendChild($bold_node);
if ($key eq "省级" || $key eq "市级") { # 省级、市级直接添加HOTLINK结点
my $hotlink_node = $doc->createElement("HOTLINK");
$hotlink_node->{Text} = $value;
$p_node->appendChild($hotlink_node);
}
else {
my $text_node = $doc->createTextNode($value);
$p_node->appendChild($text_node);
}
}
my $cdata_text = $body_cdata->{xml};
$cdata_text =~ s/(<body_cdata>)|(<//body_cdata>)//ig; # 清除body_cdata结点
# 新建CDATA结点
my $cdata_node = $doc->createCDATASection($cdata_text);
# 将CDATA结点添加到 /root/main/item/body
$body->appendChild($cdata_node);
} # for
# 保存输出XML
my($dirpath,$basename,$extname) = ($filename =~ /^((?:.*[:])?)(.*)(/.[^.]*$)/s); # 获取文件名 代码来源于basename.pm
$output_file = $output_file ? $output_file : $basename.".xml";
$doc->save($output_file);
print "Excel文件".'"'.$filename.'"'."转换成功!/n输出文件:$output_file/n";
}
$book0->Close;
# 获得Excel表格中的一个元素
sub getcell {
my($sheet, $x, $y, $cellvalue);
($sheet, $x, $y) = @_;
$cellvalue = $sheet->cells($x, $y)->{'Value'};
chomp($cellvalue); # 注意chomp的返回值为数字
$cellvalue;
}
__END__
[经验总结]Perl读取Excel数据并调用MSXML接口操作XML数据
最新推荐文章于 2019-07-28 15:36:37 发布