htsjdk库SAMFileHeader类介绍

SAMFileHeader 类是 htsjdk 库中的一个核心类,用于管理和操作 SAM/BAM 文件的头部信息。SAM(Sequence Alignment/Map)文件格式用于存储序列比对数据,而 BAM 文件是其二进制版本。头部信息包含了关于比对数据的重要元数据,包括但不限于以下几个方面:

  1. 版本信息SAMFileHeader 包含 SAM 文件格式的版本信息,这对于解析文件时的兼容性检查非常重要。

  2. 参考序列(Reference Sequences): 头部记录了所有的参考序列信息,包括序列的名称、长度和其他相关信息。这有助于确保比对结果能够正确地映射到参考基因组上。

  3. 程序记录(Program Records): 记录了用于生成或处理数据的程序的信息(例如,程序的名称、版本号、命令行参数等),这有助于追踪数据处理过程。

  4. 样本信息(Read Groups): 头部还可以包含有关样本的元数据,例如样本名称、测序平台、实验条件等。这些信息在进行数据分析时非常有用。

  5. 其它信息SAMFileHeader 还可能包括其他一些自定义的注释或信息字段。

SAMFileHeader 类提供了方法来读取、修改和写入这些头部信息,使得在处理 SAM/BAM 文件时可以灵活地管理这些重要的元数据。常用的方法包括添加和获取参考序列、添加和获取程序记录、添加和获取样本信息等。

源码:

/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package htsjdk.samtools;


import htsjdk.beta.plugin.HtsHeader;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Log;

import java.io.StringWriter;
import java.util.*;
import java.util.function.Supplier;

/**
 * Header information from a SAM or BAM file.
 */
public class SAMFileHeader extends AbstractSAMHeaderRecord implements HtsHeader
{
    public static final String VERSION_TAG = "VN";
    public static final String SORT_ORDER_TAG = "SO";
    public static final String GROUP_ORDER_TAG = "GO";
    public static final String CURRENT_VERSION = "1.6";
    public static final Set<String> ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5", CURRENT_VERSION );

    private SortOrder sortOrder = null;
    private GroupOrder groupOrder = null;

    private static final Log log = Log.getInstance(SAMFileHeader.class);
    /**
     * These tags are of known type, so don't need a type field in the text representation.
     */
    public static final Set<String> STANDARD_TAGS =
            new HashSet<>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG));

    @Override
    Set<String> getStandardTags() {
        return STANDARD_TAGS;
    }

    /**
     * Ways in which a SAM or BAM may be sorted.
     */
    public enum SortOrder {
        unsorted(() -> null),
        queryname(SAMRecordQueryNameComparator::new),
        coordinate(SAMRecordCoordinateComparator::new),
        duplicate(SAMRecordDuplicateComparator::new), // NB: this is not in the SAM spec!
        unknown(() -> null);

        private final Supplier<SAMRecordComparator> comparatorSupplier;

        SortOrder(final Supplier<SAMRecordComparator> comparatorClass) {
            this.comparatorSupplier = comparatorClass;
        }

        /**
         * @return Comparator class to sort in the specified order, or null if unsorted.
         * @deprecated since 7/2018, use {@link #getComparatorInstance()} with {@link SAMSortOrderChecker#getClass()} instead
         */
        @Deprecated
        public Class<? extends SAMRecordComparator> getComparator() {
            return comparatorSupplier.get().getClass();
        }

        /**
         * @return Comparator to sort in the specified order, or null if unsorted.
         */
        public SAMRecordComparator getComparatorInstance() {
            return comparatorSupplier.get();
        }
    }

    public enum GroupOrder {
        none, query, reference
    }

    private List<SAMReadGroupRecord> mReadGroups = new ArrayList<>();
    private List<SAMProgramRecord> mProgramRecords = new ArrayList<>();
    private final Map<String, SAMReadGroupRecord> mReadGroupMap = new HashMap<>();
    private final Map<String, SAMProgramRecord> mProgramRecordMap = new HashMap<>();
    private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary();
    final private List<String> mComments = new ArrayList<>();
    private final List<SAMValidationError> mValidationErrors = new ArrayList<>();

    public SAMFileHeader() {
        setAttribute(VERSION_TAG, CURRENT_VERSION);
    }

    /** Constructor that initializes the sequence dictionary with the provided one. */
    public SAMFileHeader(final SAMSequenceDictionary dict) {
        this();
        setSequenceDictionary(dict);
    }

    public String getVersion() {
        return getAttribute(VERSION_TAG);
    }

    public String getCreator() {
        return getAttribute("CR");
    }

    public SAMSequenceDictionary getSequenceDictionary() {
        return mSequenceDictionary;
    }

    public List<SAMReadGroupRecord> getReadGroups() {
        return Collections.unmodifiableList(mReadGroups);
    }

    /**
     * Look up sequence record by name.
     * @return sequence record if it's found by name,
     * or null if sequence dictionary is empty or if the sequence is not found in the dictionary.
     */
    public SAMSequenceRecord getSequence(final String name) {
        return mSequenceDictionary == null ? null : mSequenceDictionary.getSequence(name);
    }

    /**
     * Look up read group record by name.
     */
    public SAMReadGroupRecord getReadGroup(final String name) {
        return mReadGroupMap.get(name);
    }

    /**
     * Replace entire sequence dictionary.  The given sequence dictionary is stored, not copied.
     */
    public void setSequenceDictionary(final SAMSequenceDictionary sequenceDictionary) {
        mSequenceDictionary = sequenceDictionary;
    }

    public void addSequence(final SAMSequenceRecord sequenceRecord) {
        mSequenceDictionary.addSequence(sequenceRecord);
    }

    /**
     * Look up a sequence record by index.  First sequence in the header is the 0th.
     * @return The corresponding sequence record, or null if the index is out of range.
     */
    public SAMSequenceRecord getSequence(final int sequenceIndex) {
        return mSequenceDictionary.getSequence(sequenceIndex);
    }

    /**
     *
     * @return Sequence index for the given sequence name, or -1 if the name is not found.
     */
    public int getSequenceIndex(final String sequenceName) {
        return mSequenceDictionary.getSequenceIndex(sequenceName);
    }

    /**
     * Replace entire list of read groups.  The given list is stored, not copied.
     */
    public void setReadGroups(final List<SAMReadGroupRecord> readGroups) {
        mReadGroups = readGroups;
        mReadGroupMap.clear();
        for (final SAMReadGroupRecord readGroupRecord : readGroups) {
            mReadGroupMap.put(readGroupRecord.getReadGroupId(), readGroupRecord);
        }
    }

    public void addReadGroup(final SAMReadGroupRecord readGroup) {
        if (mReadGroupMap.containsKey(readGroup.getReadGroupId())) {
            throw new IllegalArgumentException("Read group with group id " +
                readGroup.getReadGroupId() + " already exists in SAMFileHeader!");
        }
        mReadGroups.add(readGroup);
        mReadGroupMap.put(readGroup.getReadGroupId(), readGroup);
    }

    public List<SAMProgramRecord> getProgramRecords() {
        return Collections.unmodifiableList(mProgramRecords);
    }

    public void addProgramRecord(final SAMProgramRecord programRecord) {
        if (mProgramRecordMap.containsKey(programRecord.getProgramGroupId())) {
            throw new IllegalArgumentException("Program record with group id " +
                programRecord.getProgramGroupId() + " already exists in SAMFileHeader!");
        }
        this.mProgramRecords.add(programRecord);
        this.mProgramRecordMap.put(programRecord.getProgramGroupId(), programRecord);
    }

    public SAMProgramRecord getProgramRecord(final String pgId) {
        return this.mProgramRecordMap.get(pgId);
    }

    /**
     * Replace entire list of program records
     * @param programRecords This list is used directly, not copied.
     */
    public void setProgramRecords(final List<SAMProgramRecord> programRecords) {
        this.mProgramRecords = programRecords;
        this.mProgramRecordMap.clear();
        for (final SAMProgramRecord programRecord : this.mProgramRecords) {
            this.mProgramRecordMap.put(programRecord.getProgramGroupId(), programRecord);
        }
    }

    /**
     * @return a new SAMProgramRecord with an ID guaranteed to not exist in this SAMFileHeader
     */
    public SAMProgramRecord createProgramRecord() {
        for (int i = 0; i < Integer.MAX_VALUE; ++i) {
            final String s = Integer.toString(i);
            if (!this.mProgramRecordMap.containsKey(s)) {
                final SAMProgramRecord ret = new SAMProgramRecord(s);
                addProgramRecord(ret);
                return ret;
            }
        }
        throw new IllegalStateException("Surprising number of SAMProgramRecords");
    }

    public SortOrder getSortOrder() {
        if (sortOrder == null) {
            final String so = getAttribute(SORT_ORDER_TAG);
            if (so == null) {
                sortOrder = SortOrder.unsorted;
            } else {
                try {
                    return SortOrder.valueOf(so);
                } catch (IllegalArgumentException e) {
                    log.warn("Found non-conforming header SO tag: " + so + ". Treating as 'unknown'.");
                    sortOrder = SortOrder.unknown;
                }
            }
        }
        return sortOrder;
    }

    public void setSortOrder(final SortOrder so) {
        sortOrder = so;
        super.setAttribute(SORT_ORDER_TAG, so.name());
    }

    public GroupOrder getGroupOrder() {
        if (groupOrder == null) {
            final String go = getAttribute(GROUP_ORDER_TAG);
            if (go == null) {
                groupOrder = GroupOrder.none;
            } else {
                try {
                    return GroupOrder.valueOf(go);
                } catch (IllegalArgumentException e) {
                    log.warn("Found non conforming header GO tag: " + go + ". Treating as 'none'.");
                    groupOrder = GroupOrder.none;
                }
            }
        }
        return groupOrder;
    }

    public void setGroupOrder(final GroupOrder go) {
        groupOrder = go;
        super.setAttribute(GROUP_ORDER_TAG, go.name());
    }


    /**
     * Set the given value for the attribute named 'key'.  Replaces an existing value, if any.
     * If value is null, the attribute is removed.
     * Otherwise, the value will be converted to a String with toString.
     * @param key attribute name
     * @param value attribute value
     * @deprecated Use {@link #setAttribute(String, String) instead
     */
    @Deprecated
    @Override
    public void setAttribute(final String key, final Object value) {
        if (key.equals(SORT_ORDER_TAG) || key.equals(GROUP_ORDER_TAG)) {
            this.setAttribute(key, value.toString());
        } else {
            super.setAttribute(key, value);
        }
    }

    /**
     * Set the given value for the attribute named 'key'.  Replaces an existing value, if any.
     * If value is null, the attribute is removed.
     * @param key attribute name
     * @param value attribute value
     */
    @Override
    public void setAttribute(final String key, final String value) {
        String tempVal = value;
        if (key.equals(SORT_ORDER_TAG)) {
            this.sortOrder = null;
            try {
                tempVal = SortOrder.valueOf(value).toString();
            } catch (IllegalArgumentException e) {
                tempVal = SortOrder.unknown.toString();
            }
        } else if (key.equals(GROUP_ORDER_TAG)) {
            this.groupOrder = null;
        }
        super.setAttribute(key, tempVal);
    }

    /** @deprecated since May 1st 2019 - text version of header is no longer stored. */
    @Deprecated public String getTextHeader() {  return null; }

    /** @deprecated since May 1st 2019 - text version of header is no longer stored. */
    @Deprecated public void setTextHeader(final String textHeader) { }

    public List<String> getComments() {
        return Collections.unmodifiableList(mComments);
    }

    public void addComment(String comment) {
        if (!comment.startsWith(SAMTextHeaderCodec.COMMENT_PREFIX)) {
            comment = SAMTextHeaderCodec.COMMENT_PREFIX + comment;
        }
        mComments.add(comment);
    }


    /**
     * Replace existing comments with the contents of the given collection.
     */
    public void setComments(final Collection<String> comments) {
        mComments.clear();
        for (final String comment : comments) {
            addComment(comment);
        }
    }

    public List<SAMValidationError> getValidationErrors() {
        return Collections.unmodifiableList(mValidationErrors);
    }

    public void addValidationError(final SAMValidationError error) {
        mValidationErrors.add(error);
    }

    /**
     * Replace list of validation errors with the elements of the given list.
     */
    public void setValidationErrors(final Collection<SAMValidationError> errors) {
        mValidationErrors.clear();
        mValidationErrors.addAll(errors);
    }

    @Override
    public boolean equals(final Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        final SAMFileHeader that = (SAMFileHeader) o;

        if (!attributesEqual(that)) return false;
        if (mProgramRecords != null ? !mProgramRecords.equals(that.mProgramRecords) : that.mProgramRecords != null)
            return false;
        if (mReadGroups != null ? !mReadGroups.equals(that.mReadGroups) : that.mReadGroups != null) return false;
        if (mSequenceDictionary != null ? !mSequenceDictionary.equals(that.mSequenceDictionary) : that.mSequenceDictionary != null)
            return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = attributesHashCode();
        result = 31 * result + (mSequenceDictionary != null ? mSequenceDictionary.hashCode() : 0);
        result = 31 * result + (mReadGroups != null ? mReadGroups.hashCode() : 0);
        result = 31 * result + (mProgramRecords != null ? mProgramRecords.hashCode() : 0);
        return result;
    }

    @Override
    public final SAMFileHeader clone() {
        final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
        codec.setValidationStringency(ValidationStringency.SILENT);
        return codec.decode(BufferedLineReader.fromString(getSAMString()), "SAMFileHeader.clone");
    }

    @Override
    public String getSAMString() {
        final StringWriter stringWriter = new StringWriter();
        new SAMTextHeaderCodec().encode(stringWriter, this);
        return stringWriter.toString();
    }

    /** Little class to generate program group IDs */
    public static class PgIdGenerator {
        private int recordCounter;

        private final Set<String> idsThatAreAlreadyTaken = new HashSet<>();

        public PgIdGenerator(final SAMFileHeader header) {
            for (final SAMProgramRecord pgRecord : header.getProgramRecords()) {
                idsThatAreAlreadyTaken.add(pgRecord.getProgramGroupId());
            }
            recordCounter = idsThatAreAlreadyTaken.size();
        }

        public String getNonCollidingId(final String recordId) {
            if (!idsThatAreAlreadyTaken.contains(recordId)) {
                // don't remap 1st record. If there are more records
                // with this id, they will be remapped in the 'else'.
                idsThatAreAlreadyTaken.add(recordId);
                ++recordCounter;
                return recordId;
            } else {
                String newId;
                // Below we tack on one of roughly 1.7 million possible 4 digit base36 at random. We do this because
                // our old process of just counting from 0 upward and adding that to the previous id led to 1000s of
                // calls idsThatAreAlreadyTaken.contains() just to resolve 1 collision when merging 1000s of similarly
                // processed bams.
                while (idsThatAreAlreadyTaken.contains(newId = recordId + "." + SamFileHeaderMerger.positiveFourDigitBase36Str(recordCounter++)))
                    ;

                idsThatAreAlreadyTaken.add(newId);
                return newId;
            }
        }
    }
}

  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
使用GATK的combinegvcf模块合并gvcf文件,可是到了这一步Using GATK jar /stor9000/apps/users/NWSUAF/2022050434/biosoft/gatk4.3/gatk-4.3.0.0/gatk-package-4.3.0.0-local.jar Running: java -Dsamjdk.use_async_io_read_samtools=false -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false -Dsamjdk.compression_level=2 -jar /stor9000/apps/users/NWSUAF/2022050434/biosoft/gatk4.3/gatk-4.3.0.0/gatk-package-4.3.0.0-local.jar CombineGVCFs -R /stor9000/apps/users/NWSUAF/2008115251/genomes/ARS-UCD1.2_Btau5.0.1Y.fa --variant /stor9000/apps/users/NWSUAF/2020055419/home/xncattle/03.GVCF/01_out_GVCF/XN_22/1_XN_22.g.vcf.gz --variant /stor9000/apps/users/NWSUAF/2020055419/home/xncattle/03.GVCF/01_out_GVCF/XN_18/1_XN_18.g.vcf.gz -O /stor9000/apps/users/NWSUAF/2022050469/candy/bwa/gatk/Combine/chr1.g.vcf.gz 09:10:40.524 INFO NativeLibraryLoader - Loading libgkl_compression.so from jar:file:/stor9000/apps/users/NWSUAF/2022050434/biosoft/gatk4.3/gatk-4.3.0.0/gatk-package-4.3.0.0-local.jar!/com/intel/gkl/native/libgkl_compression.so 09:10:50.696 INFO CombineGVCFs - ------------------------------------------------------------ 09:10:50.697 INFO CombineGVCFs - The Genome Analysis Toolkit (GATK) v4.3.0.0 09:10:50.697 INFO CombineGVCFs - For support and documentation go to https://software.broadinstitute.org/gatk/ 09:10:50.698 INFO CombineGVCFs - Executing as 2022050469@node54 on Linux v3.10.0-1127.el7.x86_64 amd64 09:10:50.698 INFO CombineGVCFs - Java runtime: Java HotSpot(TM) 64-Bit Server VM v1.8.0_72-b15 09:10:50.698 INFO CombineGVCFs - Start Date/Time: July 21, 2023 9:10:40 AM CST 09:10:50.698 INFO CombineGVCFs - ------------------------------------------------------------ 09:10:50.698 INFO CombineGVCFs - ------------------------------------------------------------ 09:10:50.698 INFO CombineGVCFs - HTSJDK Version: 3.0.1 09:10:50.699 INFO CombineGVCFs - Picard Version: 2.27.5 09:10:50.699 INFO CombineGVCFs - Built for Spark Version: 2.4.5 09:10:50.699 INFO CombineGVCFs - HTSJDK Defaults.COMPRESSION_LEVEL : 2 09:10:50.699 INFO CombineGVCFs - HTSJDK Defa就停止了,没有输出文件,也没有报错文件
07-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值