使用底层 Matrix 包是最快的方式。
1. 模拟数据
df1=iris[1:2,1:4] #矩阵只能是数字
#造几个0值
df1[1,1]=0
df1[2,c(2,4)]=0
#
rownames(df1)=paste0("gene", 1:nrow(df1))
colnames(df1)=paste0("cell", 1:ncol(df1))
df1
输出:
> df1
cell1 cell2 cell3 cell4
gene1 0.0 3.5 1.4 0.2
gene2 4.9 0.0 1.4 0.0
2.写入文件: 3列格式
########### 写
#1. 记录行名、列名
row_name=rownames(df1)
col_name=colnames(df1)
#2. to 矩阵
mat = as.matrix(df1)
> mat
cell1 cell2 cell3 cell4
gene1 0.0 3.5 1.4 0.2
gene2 4.9 0.0 1.4 0.0
# 去掉行名、列名
rownames(mat) =NULL
colnames(mat) =NULL
> mat
[,1] [,2] [,3] [,4]
[1,] 0.0 3.5 1.4 0.2
[2,] 4.9 0.0 1.4 0.0
library(Matrix)
mat.dgC=as(mat, "dgCMatrix")
mat.dgC
> mat.dgC
2 x 4 sparse Matrix of class "dgCMatrix"
[1,] . 3.5 1.4 0.2
[2,] 4.9 . 1.4 .
writeMM(mat.dgC, "dustbin/mat.df.mtx")
writeLines(row_name, "dustbin/mat.rownames.txt")
writeLines(col_name, "dustbin/mat.colnames.txt")
3.读入文件: 3列格式
########### 读
loadData = function(dir="./dustbin/"){
df_1=readMM( file.path(dir, "mat.df.mtx") )
rownames(df_1)=readLines( file.path(dir, "mat.rownames.txt"))
colnames(df_1)=readLines( file.path(dir, "mat.colnames.txt"))
return(df_1)
}
df_1=loadData("dustbin/")
> df_1
2 x 4 sparse Matrix of class "dgTMatrix"
cell1 cell2 cell3 cell4
gene1 . 3.5 1.4 0.2
gene2 4.9 . 1.4 .
4. 行列转置
思路: 矩阵变3列格式,交换前2列,再变回去。
mat_2=summary(df_1)
> mat_2 #3列形式
2 x 4 sparse Matrix of class "dgTMatrix", with 5 entries
i j x
1 2 1 4.9
2 1 2 3.5
3 1 3 1.4
4 2 3 1.4
5 1 4 0.2
(1) 失败的尝试
mat_3=mat_2[, c(2,1,3)]
> mat_3
j i x
1 1 2 4.9
2 2 1 3.5
3 3 1 1.4
4 3 2 1.4
5 4 1 0.2
as(mat_3, "dgCMatrix") #error 尝试失败
(2) 再次尝试
str(mat_2)
#Classes ‘sparseSummary’ and 'data.frame': 5 obs. of 3 variables:
# $ i: int 2 1 1 2 1
#$ j: int 1 2 3 3 4
#$ x: num 4.9 3.5 1.4 1.4 0.2
#- attr(*, "header")= chr "2 x 4 sparse Matrix of class \"dgTMatrix\", with 5 entries"
library(Matrix)
my_t=function(mat){
if(!class(mat)[1] %in% c("sparseSummary") ){
stop("Can only input Class sparseSummary!")
}
sparseMatrix(i=mat$j, j=mat$i, x=mat$x)
}
mat_2_t=my_t(mat_2); mat_2_t
> mat_2_t=my_t(mat_2); mat_2_t
4 x 2 sparse Matrix of class "dgCMatrix"
[1,] . 4.9
[2,] 3.5 .
[3,] 1.4 1.4
[4,] 0.2 .
# 保存到文件
writeMM(mat_2_t, "dustbin/mat_2_t.txt" )