#或者在命令行直接运行 getgeo --file=GSE16441_family.soft
#【若soft 数据在多个platform上得到的,就分别运行】
use Microarray::GEO::SOFT;
use Cwd;
# initialize
my $soft = Microarray::GEO::SOFT->new;
$soft = Microarray::GEO::SOFT->new(file => "GSE16441_family.soft");#【soft数据输入】
# parse
# it returns a Microarray::GEO::SOFT::GDS,
# Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL object
# according the the GSE ID type
my $data = $soft->parse;
# sinece GSE can contain more than one GPL【一个GPL测一类数据,测mRNA和miRNA不在一个平台】
# we can get the GPL list in a GSE
my $gpl_list = $data->list("GPL");#【多个platform存储在变量my $gpl_list 】
# merge samples belonging to a same GPL into a data set
my $gds_list = $data->merge;
# if the GSE only have one platform
# then the merged data set is the first one in gds_list
# and the platform is the first one in gpl_list
my $g = $gds_list->[0];#【处理第一个platform数据】
my $gpl = $gpl_list->[0];
# since GPL data contains different mapping of genes or probes
# we can transform from probe id to gene symbol
# it returns a Microarray::ExprSet object
my $e = $g->id_convert($gpl, 'GENE_SYMBOL'); #第二个参数取决于soft文件中 gene symbol所在列的列名
# then you can do some simple processing thing
# eliminate the blank lines
$e->remove_empty_features;
# make all symbols unique
$e->unique_features;
# obtain the expression matrix
$e->save('kidney-mRNA-expression'); #【数据输出,设置输出文件名】
#【同样,稍作修改,处理该soft数据中下一个platform(GPL)的数据】
use Microarray::GEO::SOFT;use Cwd;
# initialize
my $soft = Microarray::GEO::SOFT->new;
$soft = Microarray::GEO::SOFT->new(file => " GSE16441_family.soft");
# parse
# it returns a Microarray::GEO::SOFT::GDS,
# Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL object
# according the the GSE ID type
my $data = $soft->parse;
# sinece GSE can contain more than one GPL
# we can get the GPL list in a GSE
my $gpl_list = $data->list("GPL");
# merge samples belonging to a same GPL into a data set
my $gds_list = $data->merge;
# if the GSE only have one platform
# then the merged data set is the first one in gds_list
# and the platform is the first one in gpl_list
my $g = $gds_list->[ 1];
my $gpl = $gpl_list->[ 1];
# since GPL data contains different mapping of genes or probes
# we can transform from probe id to gene symbol
# it returns a Microarray::ExprSet object
my $e = $g->id_convert($gpl, ' miRNA_ID'); #第二个参数取决于soft文件中 gene symbol所在列的列名
# then you can do some simple processing thing
# eliminate the blank lines
$e->remove_empty_features;
# make all symbols unique
$e->unique_features;
# obtain the expression matrix
$e->save(' kidney-miRNA-expression');