blastCoverage.R

# 读取CSV格式的outfmt6文件
blast_results <- read.csv("blast_results.csv", header = FALSE)

# 为列命名
colnames(blast_results) <- c("query", "subject", "identity", "length", "mismatch", 
                             "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore")

# 读取subject长度信息
subject_lengths <- read.table("subject_lengths.txt", header = TRUE)

# 计算每个subject的query覆盖率
library(dplyr)

coverage <- blast_results %>%
  mutate(query_length = qend - qstart + 1) %>%
  group_by(subject) %>%
  summarise(total_query_coverage = sum(query_length)) %>%
  left_join(subject_lengths, by = "subject") %>%
  mutate(coverage_ratio = (total_query_coverage / length) * 100,  # 转换为百分比
         coverage_ratio = round(coverage_ratio, 2)) %>%  # 保留两位小数
  select(subject, total_query_coverage, length, coverage_ratio)

# 将结果写入文件
write.table(coverage, file = "coverage.txt", sep = "\t", row.names = FALSE, quote = FALSE)

# 输出完成提示
cat("覆盖率已写入 coverage.txt\n")

blast_results.csv
NC_044701,NC_044768,96.76,1603,43,7,54477,56074,375799,374201,0,2663
NC_044701,NC_044768,83.69,423,37,19,68192,68587,44003,44420,2.00E-101,370
NC_044701,NC_044768,85.28,326,38,5,66335,66654,360145,360466,1.00E-88,327
NC_044701,NC_044768,74.07,891,172,45,102938,103801,332888,333746,1.00E-83,311
NC_044701,NC_044768,90.54,148,11,3,36180,36324,64144,64291,4.00E-48,193
NC_044701,NC_044768,95.56,90,3,1,49,138,406657,406745,4.00E-33,143
NC_044701,NC_044768,96.51,86,2,1,110947,111032,420751,420667,2.00E-32,141
NC_044701,NC_04476823,96.43,84,3,0,31738,31821,384266,384349,5.00E-32,139
NC_044701,NC_04476843,94.94,79,4,0,53913,53991,2926,3004,2.00E-27,124
NC_044701,NC_04476843,96.77,62,2,0,105594,105655,110375,110314,2.00E-21,104
NC_044701,NC_04476822,87.34,79,6,3,88210,88284,240028,240106,2.00E-16,87.9
NC_044701,NC_04476822,100,31,0,0,10365,10395,63403,63373,0.0000002,58.4
NC_044701,NC_044768,96.97,33,0,1,66299,66330,360093,360125,0.000002,54.7
NC_044703,NC_044768,87.34,79,6,3,88210,88284,240028,240106,2.00E-16,87.9
NC_044703,NC_044768,100,31,0,0,10365,10395,63403,63373,0.0000002,58.4
NC_044703,NC_04476855,96.97,33,0,1,66299,66330,360093,360125,0.000002,54.7
coverrage.txt
subject	total_query_coverage	length	coverage_ratio
NC_044768	3637	33637	10.81
NC_04476822	106	4106	2.58
NC_04476823	84	55584	0.15
NC_04476843	141	555141	0.03
NC_04476855	32	5532	0.58
subject_lengths.txt
subject    length
NC_044768	33637
NC_04476822	4106
NC_04476823	55584
NC_04476843	555141
NC_04476855	5532
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

终是蝶衣梦晓楼

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值