Blogbus.com不支持大文件的导入,所以版本2的脚本将准备导入的文件分隔成几个小文件,再操作。
#
!/usr/bin/perl -w
use strict;
# Version 2
#Ex/Im logs and comments from Vankeweekly to Blogbus
my $head =<< BLOGHEADSTR;
<? xml version = " 1.0 " encoding = " utf-8 " ?>
<!-- Generated by : http :// www . BlogBus . Com / $Revision : 1.0 $ -->
< BlogBusCom dtype = " BlogData " SchemaVersion = " 1.0-b " Creator = " BlogBus.Com BlogSystem V2.0.1-beta " >
< Description >
< BlogName > 流放之地的狂欢 </ BlogName >
< ExportTime > 2006 - 07 - 24 16 : 17 : 37 </ ExportTime >
< DomainName > bsmagic . blogbus . com </ DomainName >
</ Description >
BLOGHEADSTR
my $tail = " </BlogBusCom> " ;
my $svk = "" ;
open IN , " F:/home/bsmagic/MyBlogData20060725.xml " ;
$ /= undef ;
$svk =< IN > ;
my %hvk ;
close IN;
my $cnts = 0 ;
while ( $svk =~/< blog_Content( .+? ) > ( .+? ) </ blog_Content / sg and $cnts ++< 5000 ) {
# print OUT $2," ";
my $sc = $ 2 ;
$sc =~/< PostType > ( d + ) </ PostType >/ s;
if ($ 1 == 1 or $ 1 == 2 ) {
# <ID> <ID>20169</ID>
$sc =~/ < ID > ( .+? ) </ ID >/ s;
my $cnt = $ 1 ;
$hvk { $cnt } -> {content} = $sc ;
# <DateAdded>2005-11-30T15:52:00</DateAdded>
$sc =~/< DateAdded > ( .+? ) </ DateAdded >/ s;
$hvk { $cnt } -> {logdate} = $ 1 ;
# <Title>软件开发的模式:脚本+运行器+UI</Title>
$sc =~/ < Title > ( .+? ) </ Title >/ s;
$hvk { $cnt } -> {title} = $ 1 ;
# <Text>
$sc =~/ < Text > ( .+? ) </ Text >/ s;
$hvk { $cnt } -> {text} = $ 1 ;
# <Abstract>
$hvk { $cnt } -> {abstract} = $hvk { $cnt } -> {text};
} elsif ($ 1 == 8 ){
# <ParentID>20092</ParentID>
$sc =~/ < ParentID > ( .+? ) </ ParentID >/ s;
my $cnt = $ 1 ;
# <ID> <ID>20169</ID>
$sc =~/ < ID > ( .+? ) </ ID >/ s;
my $cmtid = $ 1 ;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {content} = $sc ;
# <DateAdded>2005-11-30T15:52:00</DateAdded>
$sc =~/< DateAdded > ( .+? ) </ DateAdded >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {logdate} = $ 1 ;
# <Title>软件开发的模式:脚本+运行器+UI</Title>
$sc =~/ < Title > ( .+? ) </ Title >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {title} = $ 1 ;
# <Text>
$sc =~/ < Text > ( .+? ) </ Text >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {text} = $ 1 ;
# <Abstract>
$hvk { $cnt } -> {comments} -> { $cmtid } -> {abstract} = $hvk { $cnt } -> {text};
# ip <SourceName>10.49.41.156</SourceName>
$sc =~/ < SourceName > ( .+? ) </ SourceName >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {ip} = $ 1 ;
# <Author>游客(未登录或非周刊用户)</Author>
$sc =~/ < Author > ( .+? ) </ Author >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {author} = $ 1 ;
# <SourceUrl>http://www.vankeweekly.com/blog/晟晟/archive/2005/10/28/20092.html</SourceUrl>
$sc =~/ < SourceUrl > ( .+? ) </ SourceUrl >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {url} = $ 1 ;
}
}
my $sc =<< COMNSTR;
< TrackBack />
< Status > 1 </ Status >
< AllowComment > Y </ AllowComment >
< AllowPing > Y </ AllowPing >
< AllowLinks > Y </ AllowLinks >
COMNSTR
my $flagcount = 0 ;
my @entry_list = keys %hvk ;
foreach ( @entry_list ) {
if ( ++ $flagcount % 50 == 1 ) {
if ( $flagcount > 1 ) {
print OUT $tail ;
close OUT;
}
open OUT , " >./toblogbus/toblogbus20060725_ " . $flagcount . " .xml " ;
print OUT $head ;
}
my $date = $hvk { $_ } -> {logdate};
if (not defined ( $date )) {
# print $_," ";
$flagcount -- ;
next ;
}
print OUT ' <Log> ' , " " ;
print OUT ' <Title> ' . $hvk { $_ } -> {title} . ' </Title> ' , " " ;
$date =~ s / T / / ;
print OUT ' <LogDate> ' . $date . ' </LogDate> ' , " " ;
print OUT ' <Excerpt> ' . $hvk { $_ } -> {abstract} . ' </Excerpt> ' , " " ;
print OUT $sc ;
print OUT ' <Content> ' . $hvk { $_ } -> {text} . ' </Content> ' , " " ;
print OUT ' <Writer>bsmagic</Writer> ' , " " ;
print OUT ' <Tags>VankeWeekly Life Motion Philosophy Fun Thought</Tags> ' , " " ;
print OUT " <Comments> " ;
my $ph_comments = $hvk { $_ } -> {comments};
grep {
# <Comment>
# <Email>bsmagic@gmail.com</Email>
# <HomePage />
# <PostIP>60.176.141.120</PostIP>
# <NiceName>屠龙江湖</NiceName>
# <CommentText>Comment Test!</CommentText>
# <CreateTime>2006-07-25 14:34:47</CreateTime>
# </Comment>
my $date = $ph_comments -> { $_ } -> {logdate};
$date =~ s / T / / ;
print OUT ' <Comment> ' ;
print OUT ' <Email /> ' ;
print OUT ' <HomePage> ' . $ph_comments -> { $_ } -> {url} . ' </HomePage> ' , " " ;
print OUT ' <PostIP> ' . $ph_comments -> { $_ } -> {ip} . ' </PostIP> ' , " " ;
print OUT ' <NiceName> ' . $ph_comments -> { $_ } -> {author} . ' </NiceName> ' , " " ;
print OUT ' <CommentText> ' . $ph_comments -> { $_ } -> {text} . ' </CommentText> ' , " " ;
print OUT ' <CreateTime> ' . $date . ' </CreateTime> ' , " " ;
print OUT ' </Comment> ' ;
} keys % { $ph_comments };
print OUT " </Comments> " ;
print OUT ' </Log> ' , " " ;
# <Log>
# <Title>快乐的发电机</Title>
# <LogDate>2006-01-19 13:57:08</LogDate>
# <Excerpt><p>用电路来比喻人的情感过程。</p><p>将人的情感体验比做一个灯泡,快乐就是灯泡亮,烦恼就是灯泡灭。<br />灯泡和一个电阻相串联,这个电阻就是本能和社会给予的各种欲望和思想上的系累和束缚。<br />电源是一台发电机。<br />驱动发电机的是水库流水的冲击力,发电机的输出功率取决于水库的落差。<br />水库中的水,一来自降雨(外来的心理能量),而来自水泵(烦恼)。<br />要快乐,有以下的方法。<br />A。提高发电机的输出功率<br />1。先烦恼,把水泵上去。<br />2。吸取外界的心理能量,或者通过各种外源和内源的刺激来激发内在的心理能量。<br />3。改进发动机</p><p>B。减少额外能量消耗<br />1。降低电阻,祛除或者掌握各种思想系累和束缚。</p><p>最高级的人,是把发电机变成一个永动机,那是不可能的。<br /></p></Excerpt>
# <TrackBack/>
# <Status>1</Status>
# <AllowComment>Y</AllowComment>
# <AllowPing>Y</AllowPing>
# <AllowLinks>Y</AllowLinks>
# <Content><p>用电路来比喻人的情感过程。</p><p>将人的情感体验比做一个灯泡,快乐就是灯泡亮,烦恼就是灯泡灭。<br />灯泡和一个电阻相串联,这个电阻就是本能和社会给予的各种欲望和思想上的系累和束缚。<br />电源是一台发电机。<br />驱动发电机的是水库流水的冲击力,发电机的输出功率取决于水库的落差。<br />水库中的水,一来自降雨(外来的心理能量),而来自水泵(烦恼)。<br />要快乐,有以下的方法。<br />A。提高发电机的输出功率<br />1。先烦恼,把水泵上去。<br />2。吸取外界的心理能量,或者通过各种外源和内源的刺激来激发内在的心理能量。<br />3。改进发动机</p><p>B。减少额外能量消耗<br />1。降低电阻,祛除或者掌握各种思想系累和束缚。</p><p>最高级的人,是把发电机变成一个永动机,那是不可能的。<br /></p></Content>
# <Writer>bsmagic</Writer>
# <Tags>哲理 情感 人生 快乐</Tags>
# </Log>
if ( $flagcount == $ # entry_list+1) {
print OUT $tail ;
close OUT;
}
}
use strict;
# Version 2
#Ex/Im logs and comments from Vankeweekly to Blogbus
my $head =<< BLOGHEADSTR;
<? xml version = " 1.0 " encoding = " utf-8 " ?>
<!-- Generated by : http :// www . BlogBus . Com / $Revision : 1.0 $ -->
< BlogBusCom dtype = " BlogData " SchemaVersion = " 1.0-b " Creator = " BlogBus.Com BlogSystem V2.0.1-beta " >
< Description >
< BlogName > 流放之地的狂欢 </ BlogName >
< ExportTime > 2006 - 07 - 24 16 : 17 : 37 </ ExportTime >
< DomainName > bsmagic . blogbus . com </ DomainName >
</ Description >
BLOGHEADSTR
my $tail = " </BlogBusCom> " ;
my $svk = "" ;
open IN , " F:/home/bsmagic/MyBlogData20060725.xml " ;
$ /= undef ;
$svk =< IN > ;
my %hvk ;
close IN;
my $cnts = 0 ;
while ( $svk =~/< blog_Content( .+? ) > ( .+? ) </ blog_Content / sg and $cnts ++< 5000 ) {
# print OUT $2," ";
my $sc = $ 2 ;
$sc =~/< PostType > ( d + ) </ PostType >/ s;
if ($ 1 == 1 or $ 1 == 2 ) {
# <ID> <ID>20169</ID>
$sc =~/ < ID > ( .+? ) </ ID >/ s;
my $cnt = $ 1 ;
$hvk { $cnt } -> {content} = $sc ;
# <DateAdded>2005-11-30T15:52:00</DateAdded>
$sc =~/< DateAdded > ( .+? ) </ DateAdded >/ s;
$hvk { $cnt } -> {logdate} = $ 1 ;
# <Title>软件开发的模式:脚本+运行器+UI</Title>
$sc =~/ < Title > ( .+? ) </ Title >/ s;
$hvk { $cnt } -> {title} = $ 1 ;
# <Text>
$sc =~/ < Text > ( .+? ) </ Text >/ s;
$hvk { $cnt } -> {text} = $ 1 ;
# <Abstract>
$hvk { $cnt } -> {abstract} = $hvk { $cnt } -> {text};
} elsif ($ 1 == 8 ){
# <ParentID>20092</ParentID>
$sc =~/ < ParentID > ( .+? ) </ ParentID >/ s;
my $cnt = $ 1 ;
# <ID> <ID>20169</ID>
$sc =~/ < ID > ( .+? ) </ ID >/ s;
my $cmtid = $ 1 ;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {content} = $sc ;
# <DateAdded>2005-11-30T15:52:00</DateAdded>
$sc =~/< DateAdded > ( .+? ) </ DateAdded >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {logdate} = $ 1 ;
# <Title>软件开发的模式:脚本+运行器+UI</Title>
$sc =~/ < Title > ( .+? ) </ Title >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {title} = $ 1 ;
# <Text>
$sc =~/ < Text > ( .+? ) </ Text >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {text} = $ 1 ;
# <Abstract>
$hvk { $cnt } -> {comments} -> { $cmtid } -> {abstract} = $hvk { $cnt } -> {text};
# ip <SourceName>10.49.41.156</SourceName>
$sc =~/ < SourceName > ( .+? ) </ SourceName >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {ip} = $ 1 ;
# <Author>游客(未登录或非周刊用户)</Author>
$sc =~/ < Author > ( .+? ) </ Author >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {author} = $ 1 ;
# <SourceUrl>http://www.vankeweekly.com/blog/晟晟/archive/2005/10/28/20092.html</SourceUrl>
$sc =~/ < SourceUrl > ( .+? ) </ SourceUrl >/ s;
$hvk { $cnt } -> {comments} -> { $cmtid } -> {url} = $ 1 ;
}
}
my $sc =<< COMNSTR;
< TrackBack />
< Status > 1 </ Status >
< AllowComment > Y </ AllowComment >
< AllowPing > Y </ AllowPing >
< AllowLinks > Y </ AllowLinks >
COMNSTR
my $flagcount = 0 ;
my @entry_list = keys %hvk ;
foreach ( @entry_list ) {
if ( ++ $flagcount % 50 == 1 ) {
if ( $flagcount > 1 ) {
print OUT $tail ;
close OUT;
}
open OUT , " >./toblogbus/toblogbus20060725_ " . $flagcount . " .xml " ;
print OUT $head ;
}
my $date = $hvk { $_ } -> {logdate};
if (not defined ( $date )) {
# print $_," ";
$flagcount -- ;
next ;
}
print OUT ' <Log> ' , " " ;
print OUT ' <Title> ' . $hvk { $_ } -> {title} . ' </Title> ' , " " ;
$date =~ s / T / / ;
print OUT ' <LogDate> ' . $date . ' </LogDate> ' , " " ;
print OUT ' <Excerpt> ' . $hvk { $_ } -> {abstract} . ' </Excerpt> ' , " " ;
print OUT $sc ;
print OUT ' <Content> ' . $hvk { $_ } -> {text} . ' </Content> ' , " " ;
print OUT ' <Writer>bsmagic</Writer> ' , " " ;
print OUT ' <Tags>VankeWeekly Life Motion Philosophy Fun Thought</Tags> ' , " " ;
print OUT " <Comments> " ;
my $ph_comments = $hvk { $_ } -> {comments};
grep {
# <Comment>
# <Email>bsmagic@gmail.com</Email>
# <HomePage />
# <PostIP>60.176.141.120</PostIP>
# <NiceName>屠龙江湖</NiceName>
# <CommentText>Comment Test!</CommentText>
# <CreateTime>2006-07-25 14:34:47</CreateTime>
# </Comment>
my $date = $ph_comments -> { $_ } -> {logdate};
$date =~ s / T / / ;
print OUT ' <Comment> ' ;
print OUT ' <Email /> ' ;
print OUT ' <HomePage> ' . $ph_comments -> { $_ } -> {url} . ' </HomePage> ' , " " ;
print OUT ' <PostIP> ' . $ph_comments -> { $_ } -> {ip} . ' </PostIP> ' , " " ;
print OUT ' <NiceName> ' . $ph_comments -> { $_ } -> {author} . ' </NiceName> ' , " " ;
print OUT ' <CommentText> ' . $ph_comments -> { $_ } -> {text} . ' </CommentText> ' , " " ;
print OUT ' <CreateTime> ' . $date . ' </CreateTime> ' , " " ;
print OUT ' </Comment> ' ;
} keys % { $ph_comments };
print OUT " </Comments> " ;
print OUT ' </Log> ' , " " ;
# <Log>
# <Title>快乐的发电机</Title>
# <LogDate>2006-01-19 13:57:08</LogDate>
# <Excerpt><p>用电路来比喻人的情感过程。</p><p>将人的情感体验比做一个灯泡,快乐就是灯泡亮,烦恼就是灯泡灭。<br />灯泡和一个电阻相串联,这个电阻就是本能和社会给予的各种欲望和思想上的系累和束缚。<br />电源是一台发电机。<br />驱动发电机的是水库流水的冲击力,发电机的输出功率取决于水库的落差。<br />水库中的水,一来自降雨(外来的心理能量),而来自水泵(烦恼)。<br />要快乐,有以下的方法。<br />A。提高发电机的输出功率<br />1。先烦恼,把水泵上去。<br />2。吸取外界的心理能量,或者通过各种外源和内源的刺激来激发内在的心理能量。<br />3。改进发动机</p><p>B。减少额外能量消耗<br />1。降低电阻,祛除或者掌握各种思想系累和束缚。</p><p>最高级的人,是把发电机变成一个永动机,那是不可能的。<br /></p></Excerpt>
# <TrackBack/>
# <Status>1</Status>
# <AllowComment>Y</AllowComment>
# <AllowPing>Y</AllowPing>
# <AllowLinks>Y</AllowLinks>
# <Content><p>用电路来比喻人的情感过程。</p><p>将人的情感体验比做一个灯泡,快乐就是灯泡亮,烦恼就是灯泡灭。<br />灯泡和一个电阻相串联,这个电阻就是本能和社会给予的各种欲望和思想上的系累和束缚。<br />电源是一台发电机。<br />驱动发电机的是水库流水的冲击力,发电机的输出功率取决于水库的落差。<br />水库中的水,一来自降雨(外来的心理能量),而来自水泵(烦恼)。<br />要快乐,有以下的方法。<br />A。提高发电机的输出功率<br />1。先烦恼,把水泵上去。<br />2。吸取外界的心理能量,或者通过各种外源和内源的刺激来激发内在的心理能量。<br />3。改进发动机</p><p>B。减少额外能量消耗<br />1。降低电阻,祛除或者掌握各种思想系累和束缚。</p><p>最高级的人,是把发电机变成一个永动机,那是不可能的。<br /></p></Content>
# <Writer>bsmagic</Writer>
# <Tags>哲理 情感 人生 快乐</Tags>
# </Log>
if ( $flagcount == $ # entry_list+1) {
print OUT $tail ;
close OUT;
}
}