blast_results <- read.csv("blast_results.csv", header = FALSE)
colnames(blast_results) <- c("query", "subject", "identity", "length", "mismatch",
"gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore")
subject_lengths <- read.table("subject_lengths.txt", header = TRUE)
library(dplyr)
coverage <- blast_results %>%
mutate(query_length = qend - qstart + 1) %>%
group_by(subject) %>%
summarise(total_query_coverage = sum(query_length)) %>%
left_join(subject_lengths, by = "subject") %>%
mutate(coverage_ratio = (total_query_coverage / length) * 100,
coverage_ratio = round(coverage_ratio, 2)) %>%
select(subject, total_query_coverage, length, coverage_ratio)
write.table(coverage, file = "coverage.txt", sep = "\t", row.names = FALSE, quote = FALSE)
cat("覆盖率已写入 coverage.txt\n")
blast_results.csv
NC_044701,NC_044768,96.76,1603,43,7,54477,56074,375799,374201,0,2663
NC_044701,NC_044768,83.69,423,37,19,68192,68587,44003,44420,2.00E-101,370
NC_044701,NC_044768,85.28,326,38,5,66335,66654,360145,360466,1.00E-88,327
NC_044701,NC_044768,74.07,891,172,45,102938,103801,332888,333746,1.00E-83,311
NC_044701,NC_044768,90.54,148,11,3,36180,36324,64144,64291,4.00E-48,193
NC_044701,NC_044768,95.56,90,3,1,49,138,406657,406745,4.00E-33,143
NC_044701,NC_044768,96.51,86,2,1,110947,111032,420751,420667,2.00E-32,141
NC_044701,NC_04476823,96.43,84,3,0,31738,31821,384266,384349,5.00E-32,139
NC_044701,NC_04476843,94.94,79,4,0,53913,53991,2926,3004,2.00E-27,124
NC_044701,NC_04476843,96.77,62,2,0,105594,105655,110375,110314,2.00E-21,104
NC_044701,NC_04476822,87.34,79,6,3,88210,88284,240028,240106,2.00E-16,87.9
NC_044701,NC_04476822,100,31,0,0,10365,10395,63403,63373,0.0000002,58.4
NC_044701,NC_044768,96.97,33,0,1,66299,66330,360093,360125,0.000002,54.7
NC_044703,NC_044768,87.34,79,6,3,88210,88284,240028,240106,2.00E-16,87.9
NC_044703,NC_044768,100,31,0,0,10365,10395,63403,63373,0.0000002,58.4
NC_044703,NC_04476855,96.97,33,0,1,66299,66330,360093,360125,0.000002,54.7
coverrage.txt
subject total_query_coverage length coverage_ratio
NC_044768 3637 33637 10.81
NC_04476822 106 4106 2.58
NC_04476823 84 55584 0.15
NC_04476843 141 555141 0.03
NC_04476855 32 5532 0.58
subject_lengths.txt
subject length
NC_044768 33637
NC_04476822 4106
NC_04476823 55584
NC_04476843 555141
NC_04476855 5532