#一个目录下的所有DOC文件写入多个TXT文件
#程序:刘兴
#时间:2010.3.19
#blog:http://deepfuture.iteye.com
#QQ:782322192
use 5.010;
use warnings;
use strict;
use Win32::OLE;
use Win32::OLE::Const 'Microsoft Word';
my $mydir='D:/深未来/文本挖掘/txt';
my @list = glob("$mydir/*.doc");
#笔者blog:http://deepfuture.iteye.com/
my $word = CreateObject Win32::OLE 'Word.Application' or die $!;
$word->{'Visible'} = 0;
my $mylist;
$|=1;
#笔者blog:http://deepfuture.iteye.com/
foreach $mylist(@list){
my $txtfn=$mylist;
$txtfn=~s/.doc/.txt/i;
open MYTXT,">$txtfn";
my $document = $word->Documents->Open("$mylist");
my $countid=$document->Paragraphs->Count; #取得文档的段落数目
my $id=1;
say '';
say "正在处理:$mylist=========>$txtfn";
#笔者blog:http://deepfuture.iteye.com/
while ($id<=$countid){
my $paragraphs = $document->Paragraphs($id);
print ".";
if ($paragraphs)
{
my $myrange = $paragraphs->range;
if ($myrange){
my $mytext=$myrange->Text;
if ($mytext){
#笔者blog:http://deepfuture.iteye.com/
print MYTXT "$mytext\n"; #把某个目录下所有doc文件内容输出为一个文件 mytxt.txt
}
}
}
$id++;
}
if ($word->Documents) {
$word->Documents->close;
}
close MYTXT;
}
$word->quit();