1 perl作为一种使用报表提取语言具有很强的处理文本文件的能力,下面介绍如何用perl打开excel文件
a 把excel另存为 .csv 格式,然后打开,excel原来中每行的内容不变,列用逗号隔开。如下所示
Group,Group title,Session chair,Date,Order,Time,Paper ID,Authors,Paper title,
b
在 2000 年,Takanori Kawai 和 John McNamara 编写出了 Spreadsheet::WriteExcel 和 Spreadsheet::ParseExcel 模块并将它们张贴在 CPAN 上,这两个模块使得在任何平台上从 Excel 文件抽取数据成为可能。
perl写excel这个模块的内容
#!/usr/bin/perl
use strict;
use warnings;
use Encode;
use Spreadsheet::WriteExcel;
# 设置一个对象出来,并写上需要存成什么名字的xls
my $xls = Spreadsheet::WriteExcel->new( "fukaiss.xls" );
# xml的内容名字
my $xlsContent = $xls->add_worksheet( 'report' );
# 这是对格式的设置,我们可以设置一个标题的,一个内容的,
# 我现在只设置一个内容
my $contentStyle = $xls->add_format();
$contentStyle->set_size( 8 );
$contentStyle->set_bold(); #设置字体为粗体
$contentStyle->set_align( 'center' );#设置单元格居中
$contentStyle->set_text_wrap(); #是否回车换行
$contentStyle->set_color('red'); #设置单元格前景色为红色
# 写表内容(格式是使用上面添加的表格式)
# 这个中的A,B,C是设置的excel中上面行的字母
# 这个地方中的文字我用了decode这样中文才能正常显示
# 最后面的contentStyle是我上面设置的行风格
$xlsContent->write( "A1", decode( 'utf8', "名字" ), $contentStyle );
$xlsContent->write( "B1", decode( 'utf8', "时间" ), $contentStyle );
$xlsContent->write( "C2", decode( 'utf8', "语言" ), $contentStyle );
#$xlsContent->write($row, $col, 'Hi Excel!', $format); #行,列,内容,格式
#这是关闭,上面的内容设置成循环就能生成很多行了
$xls->close();
其它一些有用的设置
# #设置列的宽度
set_column($first_col, $last_col, $width, $format, $hidden, $level, $collapsed)
下面perl是读excel文件内容
我用的Spreadsheet::Read 模块可以读取xls,csv和sxc等格式的文件,这是那本Perl Hacks上非常推荐的读这些的模块.
#!/usr/bin/perl
use Spreadsheet::Read;
use Data::Dumper;
use Smart::Comments;
my $file = '2808861.xls';
my $spreadsheet = ReadData( $file) or die "Cannot read file ";#指定读的文件名
my $sheet_count = $spreadsheet->[0]{sheets} or die "No sheets in $file/n"; #这个是查有几个sheet
for my $sheet_index (1 .. $sheet_count){
my $sheet = $spreadsheet->[$sheet_index] or next;
printf("%s - %2d: [%-s] %3d Cols, %5d Rows/n",
$file,$sheet_index,$sheet->{label},$sheet->{maxcol},$sheet->{maxrow});#label是sheet名
for my $row (1 .. $sheet->{maxrow}) {
print join "/t" => map {
my $data = $sheet->{cell}[$_][$row] ;
defined $data ? $data : "-";
}1 .. $sheet->{maxcol};
print "/n";
};
}
# 传一个输出的文件和一个数组和数组就能输入 execl 啦,记的第一行做标题
sub excelWrite(
my ( $filename, $file ) = @_;
my $xls = Spreadsheet::WriteExcel::Big->new( "$filename" );
$xls->compatibility_mode();
my $xlsContent= $xls->add_worksheet( "$filename" );
# 标题风格
my $titleStyle = $xls->add_format(
size => 10,
bold => 1,
border => 2,
align => 'center',
);
# 内容风格
my $rowStyle = $xls->add_format(
align => 'left',
border => 1,
);
# 设置行的宽,先查出全部有数据的一行,来做为宽度的基准
my @tmp = ();
foreach my $line ( 2 .. $#{$file} ){
foreach my $contentNu ( 0 .. $#{$file->[$line]} ){
next if $tmp[$contentNu];
$tmp[$contentNu] = $file->[$line][$contentNu];
}
}
foreach ( 0 .. $#tmp ){
$xlsContent->set_column( $_ , $_+1, 4 + length $tmp[$_]);
}
# 列
my $row = 0;
foreach my $line ( @{$file} ){
# 对风格的处理,第一行不一样
my $Style = $rowStyle;
if ($row == 0 ){
$Style = $titleStyle;
}
# 行处理
my $col = 0;
foreach my $content ( @{$line} ){
chomp $content;
$xlsContent->set_row($row, 23);
$xlsContent->write( $row, $col, decode("gbk",$content) ,$Style );
$col ++;
}
$row ++;
}
$xls->close();
}
*********将现有的excel文件导入到新的excel文件中实例*********
import.pl
#!/usr/bin/perl -w
use strict;
use Spreadsheet::ParseExcel;
use Spreadsheet::WriteExcel;
use Data::Dumper;
# cobbled together from examples for the Spreadsheet::ParseExcel and
# Spreadsheet::WriteExcel modules
my $sourcename = shift @ARGV;
my $destname = shift @ARGV or die "invocation: $0 <source file> <destination file>";
my $source_excel = new Spreadsheet::ParseExcel;
my $source_book = $source_excel->Parse($sourcename)
or die "Could not open source Excel file $sourcename: $!";
my $storage_book;
foreach my $source_sheet_number (0 .. $source_book->{SheetCount}-1)
{
my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];
print "--------- SHEET:", $source_sheet->{Name}, "/n";
# sanity checking on the source file: rows and columns should be sensible
next unless defined $source_sheet->{MaxRow};
next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
next unless defined $source_sheet->{MaxCol};
next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};
foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow})
{
foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol})
{
my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
if ($source_cell)
{
print "( $row_index , $col_index ) =>", $source_cell->Value, "/n";
if ($source_cell->{Type} eq 'Numeric')
{
$storage_book->{$source_sheet->{Name}}->{$row_index}->{$col_index} = $source_cell->Value*2;
}
else
{
$storage_book->{$source_sheet->{Name}}->{$row_index}->{$col_index} = $source_cell->Value;
} # end of if/else
} # end of source_cell check
} # foreach col_index
} # foreach row_index
} # foreach source_sheet_number
print "Perl recognized the following data (sheet/row/column order):/n";
print Dumper $storage_book;
my $dest_book = Spreadsheet::WriteExcel->new("$destname")
or die "Could not create a new Excel file in $destname: $!";
print "/n/nSaving recognized data in $destname...";
foreach my $sheet (keys %$storage_book)
{
my $dest_sheet = $dest_book->addworksheet($sheet);
foreach my $row (keys %{$storage_book->{$sheet}})
{
foreach my $col (keys %{$storage_book->{$sheet}->{$row}})
{
$dest_sheet->write($row, $col, $storage_book->{$sheet}->{$row}->{$col});
} # foreach column
} # foreach row
} # foreach sheet
$dest_book->close();
print "done!/n";