htsjdk库Genotype及相关类介绍

在 HTSJDK 库中,处理基因型的主要类包括 GenotypeFastGenotypeGenotypeBuilder 以及相关的类和接口。以下是这些类和接口的详细介绍:

Genotype 类

主要功能
  • 表示基因型Genotype 类用于表示个体在特定变异位置上的基因型。基因型是对个体在变异位置上的等位基因组合的描述。
  • 包含样本信息:它包括与样本相关的基因型信息,例如基因型的等位基因、深度、质量等。
源码:
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.variantcontext;


import htsjdk.tribble.util.ParsingUtils;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFUtils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;

/**
 * This class encompasses all the basic information about a genotype.  It is immutable.
 *
 * @author Mark DePristo
 */
public abstract class Genotype implements Comparable<Genotype>, Serializable {
    public static final long serialVersionUID = 1L;

    /**
     * A list of genotype field keys corresponding to values we
     * manage inline in the Genotype object.  They must not appear in the
     * extended attributes map
     */
    public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
            VCFConstants.GENOTYPE_FILTER_KEY,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS,
            VCFConstants.GENOTYPE_PL_KEY);

    public final static String PHASED_ALLELE_SEPARATOR = "|";
    public final static String UNPHASED_ALLELE_SEPARATOR = "/";

    private final String sampleName;
    private GenotypeType type = null;
    private final String filters;

    protected Genotype(final String sampleName, final String filters) {
        this.sampleName = sampleName;
        this.filters = filters == null || filters.isEmpty() ? null : filters;
    }

    /**
     * @return the alleles for this genotype.  Cannot be null.  May be empty
     */
    public abstract List<Allele> getAlleles();

    /**
     * @return true if any allele is REF
     */
    public boolean hasRefAllele() {
        return getAlleles().stream().anyMatch(A->A.isReference());
    };

    /**
     * @return true if any allele is ALT, (NO_CALL are ignored)
     */
    public boolean hasAltAllele() {
        return getAlleles().stream().anyMatch(A->!(A.isReference() || A.isNoCall()));
    };

    /**
     * Returns how many times allele appears in this genotype object?
     *
     * @param allele
     * @return a value &gt;= 0 indicating how many times the allele occurred in this sample's genotype
     */
    public int countAllele(final Allele allele) {
        int c = 0;
        for ( final Allele a : getAlleles() )
            if ( a.equals(allele) )
                c++;

        return c;
    }

    /**
     * Get the ith allele in this genotype
     *
     * @param i the ith allele, must be &lt; the ploidy, starting with 0
     * @return the allele at position i, which cannot be null
     */
    public abstract Allele getAllele(int i);

    /**
     * Are the alleles phased w.r.t. the global phasing system?
     *
     * @return true if yes
     */
    public abstract boolean isPhased();

    /**
     * What is the ploidy of this sample?
     *
     * @return the ploidy of this genotype.  0 if the site is no-called.
     */
    public int getPloidy() {
        return getAlleles().size();
    }

    /**
     * @return the sequencing depth of this sample, or -1 if this value is missing
     */
    public abstract int getDP();

    /**
     * @return the count of reads, one for each allele in the surrounding Variant context,
     *      matching the corresponding allele, or null if this value is missing.  MUST
     *      NOT BE MODIFIED!
     */
    public abstract int[] getAD();

    /**
     * Returns the name associated with this sample.
     *
     * @return a non-null String
     */
    public String getSampleName() {
        return sampleName;
    }

    /**
     * Returns a phred-scaled quality score, or -1 if none is available
     * @return
     */
    public abstract int getGQ();

    /**
     * Does the PL field have a value?
     * @return true if there's a PL field value
     */
    public boolean hasPL() {
        return getPL() != null;
    }

    /**
     * Does the AD field have a value?
     * @return true if there's a AD field value
     */
    public boolean hasAD() {
        return getAD() != null;
    }

    /**
     * Does the GQ field have a value?
     * @return true if there's a GQ field value
     */
    public boolean hasGQ() {
        return getGQ() != -1;
    }

    /**
     * Does the DP field have a value?
     * @return true if there's a DP field value
     */
    public boolean hasDP() {
        return getDP() != -1;
    }

    // ---------------------------------------------------------------------------------------------------------
    //
    // The type of this genotype
    //
    // ---------------------------------------------------------------------------------------------------------

    /**
     * @return the high-level type of this sample's genotype
     */
    public GenotypeType getType() {
        if ( type == null ) {
            type = determineType();
        }
        return type;
    }

    /**
     * Internal code to determine the type of the genotype from the alleles vector
     * @return the type
     */
    protected GenotypeType determineType() {
        // TODO -- this code is slow and could be optimized for the diploid case
        final List<Allele> alleles = getAlleles();
        if ( alleles.isEmpty() ) {
            return GenotypeType.UNAVAILABLE;
        }

        boolean sawNoCall = false, sawMultipleAlleles = false;
        Allele firstCallAllele = null;

        for ( int i = 0; i < alleles.size(); i++ ) {
            final Allele allele = alleles.get(i);
            if ( allele.isNoCall() ) {
                sawNoCall = true;
            } else if ( firstCallAllele == null ) {
                firstCallAllele = allele;
            } else if ( !allele.equals(firstCallAllele) )
                sawMultipleAlleles = true;
        }

        if ( sawNoCall ) {
            if ( firstCallAllele == null )
                return GenotypeType.NO_CALL;
            return GenotypeType.MIXED;
        }

        if ( firstCallAllele == null )
            throw new IllegalStateException("BUG: there are no alleles present in this genotype but the alleles list is not null");

        return sawMultipleAlleles ? GenotypeType.HET : firstCallAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
    }

    /**
     * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
     */
    public boolean isHom()    { return isHomRef() || isHomVar(); }

    /**
     * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
     */
    public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }

    /**
     * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
     */
    public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }

    /**
     * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
     */
    public boolean isHet() { return getType() == GenotypeType.HET; }

    /**
     * @return true if we're het (observed alleles differ) and neither allele is reference; if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
     */
    public boolean isHetNonRef() { return (getType() == GenotypeType.HET && getAllele(0).isNonReference() && getAllele(1).isNonReference()); }

    /**
     * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
     */
    public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }

    /**
     * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
     */
    public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }

    /**
     * @return true if this genotype is comprised of both calls and no-calls.
     */
    public boolean isMixed() { return getType() == GenotypeType.MIXED; }

    /**
     * @return true if the type of this genotype is set.
     */
    public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }

    // ------------------------------------------------------------------------------
    //
    // methods for getting genotype likelihoods for a genotype object, if present
    //
    // ------------------------------------------------------------------------------

    /**
     * @return Returns true if this Genotype has PL field values
     */
    public boolean hasLikelihoods() {
        return getPL() != null;
    }

    /**
     * Convenience function that returns a string representation of the PL field of this
     * genotype, or . if none is available.
     *
     * @return a non-null String representation for the PL of this sample
     */
    public String getLikelihoodsString() {
        return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4;
    }

    /**
     * Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
     * @return null or a GenotypesLikelihood object for this sample's PL field
     */
    public GenotypeLikelihoods getLikelihoods() {
        return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
    }

    /**
     * Are all likelihoods for this sample non-informative?
     *
     * Returns true if all PLs are 0 =&gt; 0,0,0 =&gt; true
     * 0,0,0,0,0,0 =&gt; true
     * 0,10,100 =&gt; false
     *
     * @return true if all samples PLs are equal and == 0
     */
    public boolean isNonInformative() {
        if ( getPL() == null )
            return true;
        else {
            for ( final int PL : getPL() ) {
                if ( PL != 0 )
                    return false;
            }

            return true;
        }
    }

    /**
     * Unsafe low-level accessor the PL field itself, may be null.
     *
     * @return a pointer to the underlying PL data.  MUST NOT BE MODIFIED!
     */
    public abstract int[] getPL();

    // ---------------------------------------------------------------------------------------------------------
    //
    // Many different string representations
    //
    // ---------------------------------------------------------------------------------------------------------

    /**
     * Return a VCF-like string representation for the alleles of this genotype.
     *
     * Does not append the reference * marker on the alleles.
     *
     * @return a string representing the genotypes, or null if the type is unavailable.
     */
    public String getGenotypeString() {
        return getGenotypeString(true);
    }

    /**
     * Return a VCF-like string representation for the alleles of this genotype.
     *
     * If ignoreRefState is true, will not append the reference * marker on the alleles.
     *
     * @return a string representing the genotypes, or null if the type is unavailable.
     */
    public String getGenotypeString(boolean ignoreRefState) {
        if ( getPloidy() == 0 )
            return "NA";

        // Notes:
        // 1. Make sure to use the appropriate separator depending on whether the genotype is phased
        final String separator = isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR;
        // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
        if (ignoreRefState) {
          return ParsingUtils.join(separator, getAlleleStrings());
        }
        // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
        List<Allele> alleles = isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles());
        return ParsingUtils.join(separator, alleles);
    }

    /**
     * Utility that returns a list of allele strings corresponding to the alleles in this sample
     * @return
     */
    protected List<String> getAlleleStrings() {
        final List<String> al = new ArrayList<String>(getPloidy());
        for ( Allele a : getAlleles() )
            al.add(a.getBaseString());

        return al;
    }

    public String toString() {
        return String.format("[%s %s%s%s%s%s%s%s]",
                getSampleName(),
                getGenotypeString(false),
                toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
                toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
                toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
                toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
                toStringIfExists(VCFConstants.GENOTYPE_FILTER_KEY, getFilters()),
                sortedString(getExtendedAttributes()));
    }

    public String toBriefString() {
        return String.format("%s:Q%d", getGenotypeString(false), getGQ());
    }

    // ---------------------------------------------------------------------------------------------------------
    //
    // Comparison operations
    //
    // ---------------------------------------------------------------------------------------------------------

    /**
     * comparable genotypes -&gt; compareTo on the sample names
     * @param genotype
     * @return
     */
    @Override
    public int compareTo(final Genotype genotype) {
        return getSampleName().compareTo(genotype.getSampleName());
    }

    public boolean sameGenotype(final Genotype other) {
        return sameGenotype(other, true);
    }

    public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
        if (getPloidy() != other.getPloidy())
            return false; // gotta have the same number of allele to be equal

        // By default, compare the elements in the lists of alleles, element-by-element
        Collection<Allele> thisAlleles = this.getAlleles();
        Collection<Allele> otherAlleles = other.getAlleles();

        if (ignorePhase) { // do not care about order, only identity of Alleles
            thisAlleles = new TreeSet<Allele>(thisAlleles);   //implemented Allele.compareTo()
            otherAlleles = new TreeSet<Allele>(otherAlleles);
        }

        return thisAlleles.equals(otherAlleles);
    }

    // ---------------------------------------------------------------------------------------------------------
    //
    // get routines for extended attributes
    //
    // ---------------------------------------------------------------------------------------------------------

    /**
     * Returns the extended attributes for this object
     * @return is never null, but is often isEmpty()
     */
    public abstract Map<String, Object> getExtendedAttributes();

    /**
     * Is key associated with a value (even a null one) in the extended attributes?
     *
     * Note this will not return true for the inline attributes DP, GQ, AD, or PL
     *
     * @param key a non-null string key to check for an association
     * @return true if key has a value in the extendedAttributes
     */
    public boolean hasExtendedAttribute(final String key) {
        return getExtendedAttributes().containsKey(key);
    }

    /**
     * Get the extended attribute value associated with key, if possible
     *
     * @param key a non-null string key to fetch a value for
     * @param defaultValue the value to return if key isn't in the extended attributes
     * @return a value (potentially) null associated with key, or defaultValue if no association exists
     */
    public Object getExtendedAttribute(final String key, final Object defaultValue) {
        return hasExtendedAttribute(key) ? getExtendedAttributes().get(key) : defaultValue;
    }

    /**
     * Same as #getExtendedAttribute with a null default
     *
     * @param key
     * @return
     */
    public Object getExtendedAttribute(final String key) {
        return getExtendedAttribute(key, null);
    }

    /**
     * Returns the filter string associated with this Genotype.
     *
     * @return If this result == null, then the genotype is considered PASSing filters
     *   If the result != null, then the genotype has failed filtering for the reason(s)
     *   specified in result.  To be reference compliant multiple filter field
     *   string values can be encoded with a ; separator.
     */
    public final String getFilters() {
        return filters;
    }

    /**
     * Is this genotype filtered or not?
     *
     * @return returns false if getFilters() == null
     */
    public final boolean isFiltered() {
        return getFilters() != null;
    }

    @Deprecated public boolean hasLog10PError() { return hasGQ(); }
    @Deprecated public double getLog10PError() { return getGQ() / -10.0; }
    @Deprecated public int getPhredScaledQual() { return getGQ(); }

    @Deprecated
    public String getAttributeAsString(String key, String defaultValue) {
        Object x = getExtendedAttribute(key);
        if ( x == null ) return defaultValue;
        if ( x instanceof String ) return (String)x;
        return String.valueOf(x); // throws an exception if this isn't a string
    }

    @Deprecated
    public int getAttributeAsInt(String key, int defaultValue) {
        Object x = getExtendedAttribute(key);
        if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
        if ( x instanceof Integer ) return (Integer)x;
        return Integer.parseInt((String)x); // throws an exception if this isn't a string
    }

    @Deprecated
    public double getAttributeAsDouble(String key, double defaultValue) {
        Object x = getExtendedAttribute(key);
        if ( x == null ) return defaultValue;
        if ( x instanceof Double ) return (Double)x;
        return VCFUtils.parseVcfDouble((String) x); // throws an exception if this isn't a string
    }

    /**
     * A totally generic getter, that allows you to get specific keys that correspond
     * to even inline values (GQ, for example).  Can be very expensive.  Additionally,
     * all <code>int[]</code> are converted inline into <code>List&lt;Integer&gt;</code> for convenience.
     *
     * @param key
     * @return
     */
    public Object getAnyAttribute(final String key) {
        if (key.equals(VCFConstants.GENOTYPE_KEY)) {
            return getAlleles();
        } else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
            return getGQ();
        } else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
            if (hasAD()) {
                final List<Integer> intList = new ArrayList<Integer>(getAD().length);
                for(int i : getAD()) intList.add(i);
                return intList;
            }
            return Collections.EMPTY_LIST;
        } else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
            if (hasPL()) {
                final List<Integer> intList = new ArrayList<Integer>(getPL().length);
                for(int i : getPL()) intList.add(i);
                return intList;
            }
            return Collections.EMPTY_LIST;
        } else if (key.equals(VCFConstants.DEPTH_KEY)) {
            return getDP();
        } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
            return getFilters();
        } else {
            return getExtendedAttribute(key);
        }
    }

    public boolean hasAnyAttribute(final String key) {
        if (key.equals(VCFConstants.GENOTYPE_KEY)) {
            return isAvailable();
        } else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
            return hasGQ();
        } else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
            return hasAD();
        } else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
            return hasPL();
        } else if (key.equals(VCFConstants.DEPTH_KEY)) {
            return hasDP();
        } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
            return true;  //always available
        } else {
            return hasExtendedAttribute(key);
        }
    }

    // TODO -- add getAttributesAsX interface here

    // ------------------------------------------------------------------------------
    //
    // private utilities
    //
    // ------------------------------------------------------------------------------

    /**
     * a utility method for generating sorted strings from a map key set.
     * @param c the map
     * @param <T> the key type
     * @param <V> the value type
     * @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
     */
    protected static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {

        // NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
        final List<T> t = new ArrayList<T>(c.keySet());
        Collections.sort(t);

        final List<String> pairs = new ArrayList<String>();
        for (final T k : t) {
            pairs.add(k + "=" + c.get(k));
        }

        return pairs.isEmpty() ? "" : " {" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
    }

    /**
     * Returns a display name for field name with value v if this isn't -1.  Otherwise returns ""
     * @param name of the field ("AD")
     * @param v the value of the field, or -1 if missing
     * @return a non-null string for display if the field is not missing
     */
    protected final static String toStringIfExists(final String name, final int v) {
        return v == -1 ? "" : " " + name + " " + v;
    }

    /**
     * Returns a display name for field name with String value v if this isn't null.  Otherwise returns ""
     * @param name of the field ("FT")
     * @param v the value of the field, or null if missing
     * @return a non-null string for display if the field is not missing
     */
    protected final static String toStringIfExists(final String name, final String v) {
        return v == null ? "" : " " + name + " " + v;
    }

    /**
     * Returns a display name for field name with values vs if this isn't null.  Otherwise returns ""
     * @param name of the field ("AD")
     * @param vs the value of the field, or null if missing
     * @return a non-null string for display if the field is not missing
     */
    protected final static String toStringIfExists(final String name, final int[] vs) {
        if ( vs == null )
            return "";
        else {
            StringBuilder b = new StringBuilder();
            b.append(' ').append(name).append(' ');
            for ( int i = 0; i < vs.length; i++ ) {
                if ( i != 0 ) b.append(',');
                b.append(vs[i]);
            }
            return b.toString();
        }
    }

    /**
     * Does the attribute map have a mapping involving a forbidden key (i.e.,
     * one that's managed inline by this Genotypes object?
     *
     * @param attributes the extended attributes key
     * @return
     */
    protected final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
        for ( final String forbidden : PRIMARY_KEYS)
            if ( attributes.containsKey(forbidden) )
                return true;
        return false;
    }

    protected final static boolean isForbiddenKey(final String key) {
        return PRIMARY_KEYS.contains(key);
    }
}

FastGenotype 类

FastGenotype 类继承自Genotype 类。

主要功能
  • 高效存储:通过优化数据结构和访问方式,FastGenotype 类在存储和处理基因型数据时具有更高的效率。
  • 简化表示:提供一种简化的基因型表示方式,减少了内存消耗和计算开销。
  • 兼容性:通常与 Genotype 类兼容,允许在需要时进行转换。
源码:
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.variantcontext;

import java.util.List;
import java.util.Map;

/**
 * This class encompasses all the basic information about a genotype.
 *
 * For the sake of performance, it does not make a copy of the Collections/arrays it's constructed from, and so
 * subsequent changes to those Collections/arrays will be reflected in the FastGenotype object
 *
 * A genotype has several key fields
 *
 * <ul>
 * <li> a sample name, must be a non-null string</li>
 * <li> an ordered list of alleles, intrepreted as the genotype of the sample,
 *    each allele for each chromosome given in order.  If alleles = [a*, t]
 *    then the sample is a/t, with a (the reference from the *) the first
 *    chromosome and t on the second chromosome</li>
 * <li> an <code>isPhased</code> marker indicating where the alleles are phased with respect to some global
 *    coordinate system.  See VCF4.1 spec for a detailed discussion</li>
 * <li> Inline, optimized <code>int</code>s and <code>int[]</code> values for:
 * <ul>
 *      <li> GQ: the phred-scaled genotype quality, or <code>-1</code> if it's missing</li>
 *      <li> DP: the count of reads at this locus for this sample, or <code>-1</code> if missing</li>
 *      <li> AD: an array of counts of reads at this locus, one for each Allele at the site,
 *             that is, for each allele in the surrounding <code>VariantContext</code>.  <code>null</code> if missing.</li>
 *      <li> PL: phred-scaled genotype likelihoods in standard VCF4.1 order for
 *             all combinations of the alleles in the surrounding <code>VariantContext</code>, given
 *             the ploidy of the sample (from the alleles vector).  <code>null</code> if missing.</li>
 * </ul>
 * </li>
 *
 * <li> A general map from String keys to -&gt; Object values for all other attributes in
 *    this genotype.  Note that this map should not contain duplicate values for the
 *    standard bindings for GQ, DP, AD, and PL.  Genotype filters can be put into
 *    this genotype, but it isn't respected by the GATK in analyses</li>
 *</ul>
 *
 * <p>The only way to build a <code>Genotype</code> object is with a <code>GenotypeBuilder</code>, which permits values
 * to be set in any order, which means that <code>GenotypeBuilder</code> may at some in the chain of
 * sets pass through invalid states that are not permitted in a fully formed immutable
 * <code>Genotype</code>.</p>
 *
 * <p>Note this is a simplified, refactored Genotype object based on the original
 * generic (and slow) implementation from the original VariantContext + Genotype
 * codebase.</p>
 *
 * @author Mark DePristo
 * @since 05/12
 */
public final class FastGenotype extends Genotype {
    private final List<Allele> alleles;
    private final boolean isPhased;
    private final int GQ;
    private final int DP;
    private final int[] AD;
    private final int[] PL;
    private final Map<String, Object> extendedAttributes;

    /**
     * The only way to make one of these, for use by GenotypeBuilder only
     *
     * @param sampleName
     * @param alleles
     * @param isPhased
     * @param GQ
     * @param DP
     * @param AD
     * @param PL
     * @param extendedAttributes
     */
    protected FastGenotype(final String sampleName,
                           final List<Allele> alleles,
                           final boolean isPhased,
                           final int GQ,
                           final int DP,
                           final int[] AD,
                           final int[] PL,
                           final String filters,
                           final Map<String, Object> extendedAttributes) {
        super(sampleName, filters);
        this.alleles = alleles;
        this.isPhased = isPhased;
        this.GQ = GQ;
        this.DP = DP;
        this.AD = AD;
        this.PL = PL;
        this.extendedAttributes = extendedAttributes;
    }

    // ---------------------------------------------------------------------------------------------------------
    //
    // Implmenting the abstract methods
    //
    // ---------------------------------------------------------------------------------------------------------

    @Override public List<Allele> getAlleles() {
        return alleles;
    }

    @Override public Allele getAllele(int i) {
        return alleles.get(i);
    }

    @Override public boolean isPhased() {
        return isPhased;
    }

    @Override public int getDP() {
        return DP;
    }

    @Override public int[] getAD() {
        return AD;
    }

    @Override public int getGQ()  {
        return GQ;
    }

    @Override public int[] getPL() {
        return PL;
    }

    // ---------------------------------------------------------------------------------------------------------
    // 
    // get routines for extended attributes
    //
    // ---------------------------------------------------------------------------------------------------------

    @Override
    public Map<String, Object> getExtendedAttributes() {
        return extendedAttributes;
    }

    /**
     * Is values a valid AD or PL field
     * @param values
     * @return
     */
    private static boolean validADorPLField(final int[] values) {
        if ( values != null )
            for ( int v : values )
                if ( v < 0 )
                    return false;
        return true;
    }
}

GenotypeBuilder 类

GenotypeBuilder 是 HTSJDK 库中的一个类,用于构建 Genotype 对象。Genotype 表示个体在特定变异位置上的基因型数据,而 GenotypeBuilder 提供了一个灵活的方式来创建和配置这些基因型对象。

GenotypeBuilder 是 Genotype 类的构建器,用于方便地创建和配置基因型对象。它允许设置基因型的各个属性,如等位基因、深度、质量等,并最终生成一个 Genotype 实例。

主要功能
  • 设置基因型数据:可以设置基因型的样本名称、等位基因、深度、质量等属性。
  • 创建基因型对象:使用 GenotypeBuilder 创建一个配置好的 Genotype 实例。
源码:
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.variantcontext;

import htsjdk.tribble.util.ParsingUtils;
import htsjdk.variant.vcf.VCFConstants;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * A builder class for genotypes
 *
 * Provides convenience setter methods for all of the Genotype field
 * values.  Setter methods can be used in any order, allowing you to
 * pass through states that wouldn't be allowed in the highly regulated
 * immutable Genotype class.
 *
 * All fields default to meaningful MISSING values.
 *
 * Call make() to actually create the corresponding Genotype object from
 * this builder.  Can be called multiple times to create independent copies,
 * or with intervening sets to conveniently make similar Genotypes with
 * slight modifications.
 *
 * Re-using the same GenotypeBuilder to build multiple Genotype objects via calls
 * to make() is dangerous, since reference types in the builder (eg., Collections/arrays)
 * don't get copied when making each Genotype. To safely re-use the same builder object
 * multiple times, use makeWithShallowCopy() instead of make().
 *
 * @author Mark DePristo
 * @since 06/12
 */
public final class GenotypeBuilder {
    private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
    private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);

    private String sampleName = null;
    private List<Allele> alleles = Collections.emptyList();

    private boolean isPhased = false;
    private int GQ = -1;
    private int DP = -1;
    private int[] AD = null;
    private int[] PL = null;
    private Map<String, Object> extendedAttributes = null;
    private String filters = null;
    private int initialAttributeMapSize = 5;

    private final static Map<String, Object> NO_ATTRIBUTES =
            Collections.unmodifiableMap(new HashMap<String, Object>(0));

    // -----------------------------------------------------------------
    //
    // Factory methods
    //
    // -----------------------------------------------------------------

    public static Genotype create(final String sampleName, final List<Allele> alleles) {
        return new GenotypeBuilder(sampleName, alleles).make();
    }

    public static Genotype create(final String sampleName,
                                        final List<Allele> alleles,
                                        final Map<String, Object> attributes) {
        return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make();
    }

    protected static Genotype create(final String sampleName,
                                           final List<Allele> alleles,
                                           final double[] gls) {
        return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
    }

    /**
     * Create a new Genotype object for a sample that's missing from the VC (i.e., in
     * the output header).  Defaults to a diploid no call genotype ./.
     *
     * @param sampleName the name of this sample
     * @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
     */
    public static Genotype createMissing(final String sampleName, final int ploidy) {
        final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
        switch ( ploidy ) {
            case 1:  builder.alleles(HAPLOID_NO_CALL); break;
            case 2:  builder.alleles(DIPLOID_NO_CALL); break;
            default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
        }
        return builder.make();
    }

    /**
     * Create a empty builder.  Both a sampleName and alleles must be provided
     * before trying to make a Genotype from this builder.
     */
    public GenotypeBuilder() {}

    /**
     * Create a builder using sampleName.  Alleles must be provided
     * before trying to make a Genotype from this builder.
     * @param sampleName
     */
    public GenotypeBuilder(final String sampleName) {
        name(sampleName);
    }

    /**
     * Make a builder using sampleName and alleles for starting values
     * @param sampleName
     * @param alleles
     */
    public GenotypeBuilder(final String sampleName, final List<Allele> alleles) {
        name(sampleName);
        alleles(alleles);
    }

    /**
     * Create a new builder starting with the values in Genotype g
     * @param g
     */
    public GenotypeBuilder(final Genotype g) {
        copy(g);
    }

    /**
     * Copy all of the values for this builder from Genotype g
     * @param g
     * @return
     */
    public GenotypeBuilder copy(final Genotype g) {
        name(g.getSampleName());
        alleles(g.getAlleles());
        phased(g.isPhased());
        GQ(g.getGQ());
        DP(g.getDP());
        AD(g.getAD());
        PL(g.getPL());
        filter(g.getFilters());
        attributes(g.getExtendedAttributes());
        return this;
    }

    /**
     * Reset all of the builder attributes to their defaults.  After this
     * function you must provide sampleName and alleles before trying to
     * make more Genotypes.
     */
    public final void reset(final boolean keepSampleName) {
        if ( ! keepSampleName ) sampleName = null;
        alleles = Collections.emptyList();
        isPhased = false;
        GQ = -1;
        DP = -1;
        AD = null;
        PL = null;
        filters = null;
        extendedAttributes = null;
    }

    /**
     * Create a new Genotype object using the values set in this builder.
     *
     * After creation the values in this builder can be modified and more Genotypes
     * created, althrough the contents of array values like PL should never be modified
     * inline as they are not copied for efficiency reasons.
     *
     * Note: if attributes are added via this builder after a call to make(), the new Genotype will
     * be modified. Use {@link #makeWithShallowCopy} to safely re-use the same builder object
     * multiple times.
     *
     * @return a newly minted Genotype object with values provided from this builder
     */
    public Genotype make() {
        final Map<String, Object> ea = (extendedAttributes == null) ? NO_ATTRIBUTES : extendedAttributes;
        return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea);
    }

    /**
     * Create a new Genotype object using the values set in this builder, and perform a
     * shallow copy of reference types to allow safer re-use of this builder
     *
     * After creation the values in this builder can be modified and more Genotypes
     * created.
     *
     * @return a newly minted Genotype object with values provided from this builder
     */
    public Genotype makeWithShallowCopy() {
        final Map<String, Object> ea = (extendedAttributes == null) ? NO_ATTRIBUTES : new HashMap<>(extendedAttributes);
        final List<Allele> al = new ArrayList<>(alleles);
        final int[] copyAD = (AD == null) ? null : Arrays.copyOf(AD, AD.length);
        final int[] copyPL = (PL == null) ? null : Arrays.copyOf(PL, PL.length);
        return new FastGenotype(sampleName, al, isPhased, GQ, DP, copyAD, copyPL, filters, ea);
    }

    /**
     * Set this genotype's name
     * @param sampleName
     * @return
     */
    public GenotypeBuilder name(final String sampleName) {
        this.sampleName = sampleName;
        return this;
    }

    /**
     * Set this genotype's alleles
     * @param alleles
     * @return
     */
    public GenotypeBuilder alleles(final List<Allele> alleles) {
        if ( alleles == null )
            this.alleles = Collections.emptyList();
        else
            this.alleles = alleles;
        return this;
    }

    /**
     * Is this genotype phased?
     * @param phased
     * @return
     */
    public GenotypeBuilder phased(final boolean phased) {
        isPhased = phased;
        return this;
    }

    public GenotypeBuilder GQ(final int GQ) {
        this.GQ = GQ;
        return this;
    }

    /**  Set the GQ with a log10PError value
     *
     * @param pLog10Error
     * @return
     */
    public GenotypeBuilder log10PError(final double pLog10Error) {
        if ( pLog10Error == CommonInfo.NO_LOG10_PERROR )
            return noGQ();
        else
            return GQ((int)Math.round(pLog10Error * -10));
    }

    /**
     * This genotype has no GQ value
     * @return
     */
    public GenotypeBuilder noGQ() { GQ = -1; return this; }

    /**
     * This genotype has no AD value
     * @return
     */
    public GenotypeBuilder noAD() { AD = null; return this; }

    /**
     * This genotype has no DP value
     * @return
     */
    public GenotypeBuilder noDP() { DP = -1; return this; }

    /**
     * This genotype has no PL value
     * @return
     */
    public GenotypeBuilder noPL() { PL = null; return this; }

    /**
     * This genotype has this DP value
     * @return
     */
    public GenotypeBuilder DP(final int DP) {
        this.DP = DP;
        return this;
    }

    /**
     * This genotype has this AD value
     * @return
     */
    public GenotypeBuilder AD(final int[] AD) {
        this.AD = AD;
        return this;
    }

    /**
     * This genotype has this PL value, as int[].  FAST
     * @return
     */
    public GenotypeBuilder PL(final int[] PL) {
        this.PL = PL;
        return this;
    }

    /**
     * This genotype has this PL value, converted from double[]. SLOW
     * @return
     */
    public GenotypeBuilder PL(final double[] GLs) {
        this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).getAsPLs();
        return this;
    }

    /**
     * This genotype has these attributes. Attributes are added to previous ones.
     *
     * Cannot contain inline attributes (DP, AD, GQ, PL). Note: this is not checked
     * @return
     */
    public GenotypeBuilder attributes(final Map<String, Object> attributes) {
        for ( Map.Entry<String, Object> pair : attributes.entrySet() )
            attribute(pair.getKey(), pair.getValue());
        return this;
    }

    /**
     * Tells this builder to remove all extended attributes
     *
     * @return
     */
    public GenotypeBuilder noAttributes() {
        this.extendedAttributes = null;
        return this;
    }

    /**
     * This genotype has this attribute key / value pair.
     *
     * Cannot contain inline attributes (DP, AD, GQ, PL). Note: this is not checked
     * @return
     */
    public GenotypeBuilder attribute(final String key, final Object value) {
        if ( extendedAttributes == null )
            extendedAttributes = new HashMap<String, Object>(initialAttributeMapSize);
        extendedAttributes.put(key, value);
        return this;
    }

    /**
     * Tells this builder to make a Genotype object that has had filters applied,
     * which may be empty (passes) or have some value indicating the reasons
     * why it's been filtered.
     *
     * @param filters non-null list of filters.  empty list =&gt; PASS
     * @return this builder
     */
    public GenotypeBuilder filters(final List<String> filters) {
        if ( filters.isEmpty() )
            return filter(null);
        else if ( filters.size() == 1 )
            return filter(filters.get(0));
        else
            return filter(ParsingUtils.join(";", ParsingUtils.sortList(filters)));
    }

    /**
     * varargs version of #filters
     * @param filters
     * @return
     */
    public GenotypeBuilder filters(final String ... filters) {
        return filters(Arrays.asList(filters));
    }

    /**
     * Most efficient version of setting filters -- just set the filters string to filters
     *
     * @param filter if filters == null or filters.equals("PASS") =&gt; genotype is PASS
     * @return
     */
    public GenotypeBuilder filter(final String filter) {
        this.filters = VCFConstants.PASSES_FILTERS_v4.equals(filter) ? null : filter;
        return this;
    }

    /**
     * This genotype is unfiltered
     *
     * @return
     */
    public GenotypeBuilder unfiltered() {
        return filter(null);
    }

    /**
     * Tell's this builder that we have at most these number of attributes
     * @return
     */
    public GenotypeBuilder maxAttributes(final int i) {
        initialAttributeMapSize = i;
        return this;
    }
}

  • 9
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值