ReferenceBases
是 GATK(Genome Analysis Toolkit)库中的一个类,用于表示和操作参考基因组的基础序列数据。它通常在处理与基因组相关的数据时使用,比如在变异调用和其他基因组分析中。
ReferenceBases
类概述
ReferenceBases
是一个用于封装参考基因组中某些特定区域的基础序列(例如 DNA 序列)的类。它提供了一种简单的方式来访问和操作这些序列数据。
主要功能
- 存储参考序列数据:封装从参考基因组中提取的基础序列。
- 区域访问:允许用户获取特定区域的参考序列。
- 支持多种操作:支持对序列数据的常见操作,比如获取特定区域的序列等。
ReferenceBases
类源码:
package org.broadinstitute.hellbender.utils.reference;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import java.io.Serializable;
import java.util.Arrays;
/**
* ReferenceBases stores the bases of the reference genome for a particular interval.
* This class requires the bases to be encoded at 8 bits per base.
*/
public final class ReferenceBases implements Serializable {
private static final long serialVersionUID = 1L;
private final byte[] bases;
private final SimpleInterval interval;
public ReferenceBases( final byte[] bases, final SimpleInterval interval ) {
Utils.nonNull(bases);
Utils.nonNull(interval);
if (interval.size() != bases.length) {
throw new IllegalArgumentException(
"interval must have same length as bases, " + interval + " " + interval.size() + "," + bases.length);
}
this.bases = bases;
this.interval = interval;
}
@Override
public String toString() {
return "ReferenceBases{" +
"bases=" + Arrays.toString(bases) +
", interval=" + interval +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ReferenceBases that = (ReferenceBases) o;
if (!Arrays.equals(getBases(), that.getBases())) return false;
return getInterval().equals(that.getInterval());
}
@Override
public int hashCode() {
int result = Arrays.hashCode(getBases());
result = 31 * result + getInterval().hashCode();
return result;
}
public byte[] getBases() {
return bases;
}
public SimpleInterval getInterval() {
return interval;
}
/**
* getSubset returns only the bases of the interval passed in.
* @param subsetInterval, the subset to be returned
* @return the subset of ReferenceBases
*/
public ReferenceBases getSubset(SimpleInterval subsetInterval) {
if (!this.interval.contains(subsetInterval)) {
throw new GATKException("Reference doesn't match input interval (asked for "+subsetInterval.toString()+" but we have "+this.interval+")");
}
int start = subsetInterval.getStart() - this.interval.getStart();
int end = subsetInterval.getEnd() - this.interval.getStart();
return new ReferenceBases(Arrays.copyOfRange(this.bases, start, end + 1), subsetInterval);
}
}