linux(或者用R里的linux命令行)
for file in seq2/*.fq.gz; do
filename=$(basename "$file" .fq.gz)
avg_gc=$(seqkit fx2tab $file -g --name | awk '{sum+=$2; count++} END {print sum/count}')
echo -e "$filename\t$avg_gc" > "output2/$filename.txt"
done
合并
cat output2/*.txt > combined_GC.txt
求一个样品的平均(R语言)
# 加载所需的库
library(dplyr)
# 读取数据文件
data <- read.table("combined_GC.txt", header = FALSE, sep = "\t", stringsAsFactors = FALSE)
# 提取前缀信息
data$prefix <- sub("_.*", "", data$V1)
# 按前缀分组,计算平均值
result <- data %>%
group_by(prefix) %>%
summarise_at(2, mean)
result$prefix <- sub("-.*", "", result$prefix)
# 将结果写入新文件
write.table(result, file = "combined_GC_output.txt", sep = "\t", row.names = FALSE, col.names = TRUE)