将GEO的soft 数据转换为expression matrix

 #从soft文件中得到mRNA 和 miRNA 的表达值
#或者在命令行直接运行 getgeo --file=GSE16441_family.soft 

   

#【若soft 数据在多个platform上得到的,就分别运行】
  
 use Microarray::GEO::SOFT;
  use Cwd;
  
  # initialize
  my $soft = Microarray::GEO::SOFT->new; 


  $soft = Microarray::GEO::SOFT->new(file => "GSE16441_family.soft");#【soft数据输入】


  # parse
  # it returns a  Microarray::GEO::SOFT::GDS,
  # Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL object
  # according the the GSE ID type
  my $data = $soft->parse;


  # sinece GSE can contain more than one GPL【一个GPL测一类数据,测mRNA和miRNA不在一个平台】
  # we can get the GPL list in a GSE
  my $gpl_list = $data->list("GPL");#【多个platform存储在变量my $gpl_list 】
  
  # merge samples belonging to a same GPL into a data set
  my $gds_list = $data->merge;
  
  # if the GSE only have one platform
  # then the merged data set is the first one in gds_list
  # and the platform is the first one in gpl_list
  my $g = $gds_list->[0];#【处理第一个platform数据】
  my $gpl = $gpl_list->[0];
  
  # since GPL data contains different mapping of genes or probes
  # we can transform from probe id to gene symbol
  # it returns a Microarray::ExprSet object
  my $e = $g->id_convert($gpl, 'GENE_SYMBOL'); #第二个参数取决于soft文件中 gene symbol所在列的列名
    
  # then you can do some simple processing thing
  # eliminate the blank lines
  $e->remove_empty_features;
  
  # make all symbols unique
  $e->unique_features;
  
  # obtain the expression matrix
  $e->save('kidney-mRNA-expression');  #【数据输出,设置输出文件名】 

#【同样,稍作修改,处理该soft数据中下一个platform(GPL)的数据】

 use Microarray::GEO::SOFT;
  use Cwd;
  
  # initialize
  my $soft = Microarray::GEO::SOFT->new; 


  $soft = Microarray::GEO::SOFT->new(file => " GSE16441_family.soft");


  # parse
  # it returns a  Microarray::GEO::SOFT::GDS,
  # Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL object
  # according the the GSE ID type
  my $data = $soft->parse;


  # sinece GSE can contain more than one GPL
  # we can get the GPL list in a GSE
  my $gpl_list = $data->list("GPL");
  
  # merge samples belonging to a same GPL into a data set
  my $gds_list = $data->merge;
  
  # if the GSE only have one platform
  # then the merged data set is the first one in gds_list
  # and the platform is the first one in gpl_list
  my $g = $gds_list->[ 1];
  my $gpl = $gpl_list->[ 1];
  
  # since GPL data contains different mapping of genes or probes
  # we can transform from probe id to gene symbol
  # it returns a Microarray::ExprSet object
  my $e = $g->id_convert($gpl, ' miRNA_ID'); #第二个参数取决于soft文件中 gene symbol所在列的列名
    
  # then you can do some simple processing thing
  # eliminate the blank lines
  $e->remove_empty_features;
  
  # make all symbols unique
  $e->unique_features;
  
  # obtain the expression matrix
  $e->save(' kidney-miRNA-expression');   
  • 3
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值