Adam学习之6代码解读kmer.scala(附源码)

更多代码请见:https://github.com/xubo245/SparkLearning


Adam学习之6源码解读kmer.scala

代码:

package testAdam
import org.apache.spark._
import org.bdgenomics.adam.rdd.ADAMContext
import org.bdgenomics.adam.projections.{AlignmentRecordField, Projection}
object kmer {
def main(args:Array[String]){
  val conf=new SparkConf().setAppName("test Adam kmer").setMaster("local")
//  val conf=new SparkConf().setAppName("test Adam kmer")
  val sc=new SparkContext(conf)
val ac = new ADAMContext(sc)
// Load alignments from disk
//val reads = ac.loadAlignments("/data/NA21144.chrom11.ILLUMINA.adam",
//  val reads = ac.loadAlignments("/xubo/adam/output/small.adam",
val reads = ac.loadAlignments("hdfs://<strong>Master</strong>:9000/xubo/adam/output/small.adam",
  projection = Some(
    Projection(
      AlignmentRecordField.sequence,
      AlignmentRecordField.readMapped,
      AlignmentRecordField.mapq
    )
  )
)
// Generate, count and sort 21-mers
val kmers =reads.flatMap(_.getSequence.sliding(21).map(k => (k, 1L))).reduceByKey(_ + _).map(_.swap).sortByKey(ascending = false)
kmers.take(10).foreach(println)
// Print the top 10 most common 21-mers
}
}
<strong>Master需要改成真实IP</strong>


源码解读:

1.loadAlignments:loadAlignments是ADAMContext的函数

val reads = ac.loadAlignments("hdfs://Master:9000/xubo/adam/output/small.adam",
  projection = Some(
    Projection(
      AlignmentRecordField.sequence,
      AlignmentRecordField.readMapped,
      AlignmentRecordField.mapq
    )
  )
)


loadAlignments源码:

  def loadAlignments(
    filePath: String,
    projection: Option[Schema] = None,
    filePath2Opt: Option[String] = None,
    recordGroupOpt: Option[String] = None,
    stringency: ValidationStringency = ValidationStringency.STRICT): RDD[AlignmentRecord] = LoadAlignmentRecords.time {

    if (filePath.endsWith(".sam") ||
      filePath.endsWith(".bam")) {
      log.info("Loading " + filePath + " as SAM/BAM and converting to AlignmentRecords. Projection is ignored.")
      loadBam(filePath)
    } else if (filePath.endsWith(".ifq")) {
      log.info("Loading " + filePath + " as interleaved FASTQ and converting to AlignmentRecords. Projection is ignored.")
      loadInterleavedFastq(filePath)
    } else if (filePath.endsWith(".fq") ||
      filePath.endsWith(".fastq")) {
      log.info("Loading " + filePath + " as unpaired FASTQ and converting to AlignmentRecords. Projection is ignored.")
      loadFastq(filePath, filePath2Opt, recordGroupOpt, stringency)
    } else if (filePath.endsWith(".fa") ||
      filePath.endsWith(".fasta")) {
      log.info("Loading " + filePath + " as FASTA and converting to AlignmentRecords. Projection is ignored.")
      import ADAMContext._
      loadFasta(filePath, fragmentLength = 10000).toReads
    } else if (filePath.endsWith("contig.adam")) {
      log.info("Loading " + filePath + " as Parquet of NucleotideContigFragment and converting to AlignmentRecords. Projection is ignored.")
      loadParquet[NucleotideContigFragment](filePath).toReads
    } else {
      log.info("Loading " + filePath + " as Parquet of AlignmentRecords.")
      loadParquetAlignments(filePath, None, projection)
    }
  }


阅读结果:

scala> reads.foreach(println)
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "CTTTATTTTTATTTTTAAGGTTTTTTTTGTTTGTTTGTTTTGAGATGGAGTCTCGCTCCACCGCCCAGACTGGAG", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TGTATCTTCCTCCCCTGCTGTATGTTTCCTGCCCTCAAACATCACACTCCACGTTCTTCAGCTTTAGGACTTGGA", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TTTAATAAATGTTGATTGTCCTATTTAATTATTCTCAACTTTCCGATTTTATTTCCCATGTAACAGTGTTGTTTT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TAAAATGCCCCCATCTTCCCAGAGCTGCCAGCCCTCACAATGCCAACAGCTAAATGTACCCAAGTGTTACTGAAC", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 24, "readName": null, "sequence": "TACAGGCACCCACCATCATGCCCAGCTAATTTTTGTATTTTTGTAGAAACGGGGTTTCACCATGTTGGCCCAGCT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "GCTCACTGCAGCCTCAACCTCCTGGGCCCAAGTGATTTCATCTTATTTTTGGAAAAAAAAACAAACTAAACCAAA", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 28, "readName": null, "sequence": "TTTCTTTTTCTTTCTTTCTTTCTTTCTTTCTTTTTCTTTCTTTCTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TCATGTAGCATGCATATGGCTAACGGCAAAGTGAGGGAGGAATAATTATAGTAATAATCACAGTGATGACGTGGA", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "GCTCAGGCCTTGCAAGAATCTCTACTGCCCAACAAGTCCCTACAAGATGGCATTTAAAAGCAGTCCCTCACGCAC", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "CCTAGAGAAGCTCCCACTAGGGCTGCAGTCAATTCCCAGGTCTTAGGTGCTGAGCAGTGGGAGGTGGTGGCCATG", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "AAATAAAGTTTGGCTTTCAGTTGTAACTTTGAATATCTTTATCACAGTTATTTAAAGCCTTTAAAAAGCTTTAAT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TGTGTAACTAACATAATTGGCACTGTCCCTGTAAATTCAAATTGGATATCCTCCCAAATTTTATTTAAGCAATTG", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "TTTATTTTTTGAGCATGAAAGTAATATATGCTCAGTGTAAACAATTAGGTCATTATAAATATATTTAACAGGAAT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 35, "readName": null, "sequence": "CTCAGGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGACTACAGGCATGAGGCACCGCGCCTGGCCAGGACT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "GACAAGATAGTACTTGAGCTAAGCCTTGCAGGTTGAGTAGGATTATTCTAGTGGAATTTAGGGAAACGATGTGCA", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "CTACTCTCATTGACTGTTCAATGCCTATACAAGTAAAACTTTACCAGCACCCAAGTCAAAAAGAAAAAAAAGGGG", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "CTCATTCTCTCTCCTGCTGCACTGTGAAGAGGTGCCTGTTGCCAAGAGTATAAGTTTCCTGAGGCCTCCCAGGCC", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 60, "readName": null, "sequence": "AAATTAAACAGCTCGTTTAACTGATAATCCATACTATATTTGAGTAGGGCTGTCACATGGTTGGAACCTCCGGTT", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": null, "start": null, "oldPosition": null, "end": null, "mapq": 40, "readName": null, "sequence": "AGACTGGGTCTCACTATGTTGCCTAGGCTGGTCTCAAACTCCTGGGCTCAAGTGATCCATCTCTGCCTTCCAAAG", "qual": null, "cigar": null, "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": null, "recordGroupName": null, "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": null, "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}



2.getSequence:对一中的数据读取出Sequence,没看到源码,在Arvo中,还没下载


scala> val a0=reads.map(_.getSequence)
a0: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[9] at map at <console>:27

scala> a0.foreach(println)
GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA
CTTTATTTTTATTTTTAAGGTTTTTTTTGTTTGTTTGTTTTGAGATGGAGTCTCGCTCCACCGCCCAGACTGGAG
TGTATCTTCCTCCCCTGCTGTATGTTTCCTGCCCTCAAACATCACACTCCACGTTCTTCAGCTTTAGGACTTGGA
TTTAATAAATGTTGATTGTCCTATTTAATTATTCTCAACTTTCCGATTTTATTTCCCATGTAACAGTGTTGTTTT
TAAAATGCCCCCATCTTCCCAGAGCTGCCAGCCCTCACAATGCCAACAGCTAAATGTACCCAAGTGTTACTGAAC
TACAGGCACCCACCATCATGCCCAGCTAATTTTTGTATTTTTGTAGAAACGGGGTTTCACCATGTTGGCCCAGCT
GCTCACTGCAGCCTCAACCTCCTGGGCCCAAGTGATTTCATCTTATTTTTGGAAAAAAAAACAAACTAAACCAAA
TTTCTTTTTCTTTCTTTCTTTCTTTCTTTCTTTTTCTTTCTTTCTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCT
TCATGTAGCATGCATATGGCTAACGGCAAAGTGAGGGAGGAATAATTATAGTAATAATCACAGTGATGACGTGGA
GCTCAGGCCTTGCAAGAATCTCTACTGCCCAACAAGTCCCTACAAGATGGCATTTAAAAGCAGTCCCTCACGCAC
CCTAGAGAAGCTCCCACTAGGGCTGCAGTCAATTCCCAGGTCTTAGGTGCTGAGCAGTGGGAGGTGGTGGCCATG
AAATAAAGTTTGGCTTTCAGTTGTAACTTTGAATATCTTTATCACAGTTATTTAAAGCCTTTAAAAAGCTTTAAT
TGTGTAACTAACATAATTGGCACTGTCCCTGTAAATTCAAATTGGATATCCTCCCAAATTTTATTTAAGCAATTG
TTTATTTTTTGAGCATGAAAGTAATATATGCTCAGTGTAAACAATTAGGTCATTATAAATATATTTAACAGGAAT
CTCAGGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGACTACAGGCATGAGGCACCGCGCCTGGCCAGGACT
GACAAGATAGTACTTGAGCTAAGCCTTGCAGGTTGAGTAGGATTATTCTAGTGGAATTTAGGGAAACGATGTGCA
CTACTCTCATTGACTGTTCAATGCCTATACAAGTAAAACTTTACCAGCACCCAAGTCAAAAAGAAAAAAAAGGGG
CTCATTCTCTCTCCTGCTGCACTGTGAAGAGGTGCCTGTTGCCAAGAGTATAAGTTTCCTGAGGCCTCCCAGGCC
AAATTAAACAGCTCGTTTAACTGATAATCCATACTATATTTGAGTAGGGCTGTCACATGGTTGGAACCTCCGGTT
AGACTGGGTCTCACTATGTTGCCTAGGCTGGTCTCAAACTCCTGGGCTCAAGTGATCCATCTCTGCCTTCCAAAG


3. sliding: 对2中的数据进行分片,sliding为iterator中的函数,

scala> val a1=reads.map(_.getSequence.sliding(21))
a1: org.apache.spark.rdd.RDD[Iterator[String]] = MapPartitionsRDD[10] at map at <console>:27

scala> for(i<-a1){ while(i.hasNext){print(i.next()+" ")} ;println()}
GTATAAGAGCAGCCTTATTCC TATAAGAGCAGCCTTATTCCT ATAAGAGCAGCCTTATTCCTA TAAGAGCAGCCTTATTCCTAT AAGAGCAGCCTTATTCCTATT AGAGCAGCCTTATTCCTATTT GAGCAGCCTTATTCCTATTTA AGCAGCCTTATTCCTATTTAT GCAGCCTTATTCCTATTTATA CAGCCTTATTCCTATTTATAA AGCCTTATTCCTATTTATAAT GCCTTATTCCTATTTATAATC CCTTATTCCTATTTATAATCA CTTATTCCTATTTATAATCAG TTATTCCTATTTATAATCAGG TATTCCTATTTATAATCAGGG ATTCCTATTTATAATCAGGGT TTCCTATTTATAATCAGGGTG TCCTATTTATAATCAGGGTGA CCTATTTATAATCAGGGTGAA CTATTTATAATCAGGGTGAAA TATTTATAATCAGGGTGAAAC ATTTATAATCAGGGTGAAACA TTTATAATCAGGGTGAAACAC TTATAATCAGGGTGAAACACC TATAATCAGGGTGAAACACCT ATAATCAGGGTGAAACACCTG TAATCAGGGTGAAACACCTGT AATCAGGGTGAAACACCTGTG ATCAGGGTGAAACACCTGTGC TCAGGGTGAAACACCTGTGCC CAGGGTGAAACACCTGTGCCA AGGGTGAAACACCTGTGCCAA GGGTGAAACACCTGTGCCAAT GGTGAAACACCTGTGCCAATG GTGAAACACCTGTGCCAATGC TGAAACACCTGTGCCAATGCC GAAACACCTGTGCCAATGCCA AAACACCTGTGCCAATGCCAA AACACCTGTGCCAATGCCAAG ACACCTGTGCCAATGCCAAGA CACCTGTGCCAATGCCAAGAC ACCTGTGCCAATGCCAAGACA CCTGTGCCAATGCCAAGACAG CTGTGCCAATGCCAAGACAGG TGTGCCAATGCCAAGACAGGG GTGCCAATGCCAAGACAGGGG TGCCAATGCCAAGACAGGGGT GCCAATGCCAAGACAGGGGTG CCAATGCCAAGACAGGGGTGC CAATGCCAAGACAGGGGTGCC AATGCCAAGACAGGGGTGCCA ATGCCAAGACAGGGGTGCCAA TGCCAAGACAGGGGTGCCAAG GCCAAGACAGGGGTGCCAAGA 
CTTTATTTTTATTTTTAAGGT TTTATTTTTATTTTTAAGGTT TTATTTTTATTTTTAAGGTTT TATTTTTATTTTTAAGGTTTT ATTTTTATTTTTAAGGTTTTT TTTTTATTTTTAAGGTTTTTT TTTTATTTTTAAGGTTTTTTT TTTATTTTTAAGGTTTTTTTT TTATTTTTAAGGTTTTTTTTG TATTTTTAAGGTTTTTTTTGT ATTTTTAAGGTTTTTTTTGTT TTTTTAAGGTTTTTTTTGTTT TTTTAAGGTTTTTTTTGTTTG TTTAAGGTTTTTTTTGTTTGT TTAAGGTTTTTTTTGTTTGTT TAAGGTTTTTTTTGTTTGTTT AAGGTTTTTTTTGTTTGTTTG AGGTTTTTTTTGTTTGTTTGT GGTTTTTTTTGTTTGTTTGTT GTTTTTTTTGTTTGTTTGTTT TTTTTTTTGTTTGTTTGTTTT TTTTTTTGTTTGTTTGTTTTG TTTTTTGTTTGTTTGTTTTGA TTTTTGTTTGTTTGTTTTGAG TTTTGTTTGTTTGTTTTGAGA TTTGTTTGTTTGTTTTGAGAT TTGTTTGTTTGTTTTGAGATG TGTTTGTTTGTTTTGAGATGG GTTTGTTTGTTTTGAGATGGA TTTGTTTGTTTTGAGATGGAG TTGTTTGTTTTGAGATGGAGT TGTTTGTTTTGAGATGGAGTC GTTTGTTTTGAGATGGAGTCT TTTGTTTTGAGATGGAGTCTC TTGTTTTGAGATGGAGTCTCG TGTTTTGAGATGGAGTCTCGC GTTTTGAGATGGAGTCTCGCT TTTTGAGATGGAGTCTCGCTC TTTGAGATGGAGTCTCGCTCC TTGAGATGGAGTCTCGCTCCA TGAGATGGAGTCTCGCTCCAC GAGATGGAGTCTCGCTCCACC AGATGGAGTCTCGCTCCACCG GATGGAGTCTCGCTCCACCGC ATGGAGTCTCGCTCCACCGCC TGGAGTCTCGCTCCACCGCCC GGAGTCTCGCTCCACCGCCCA GAGTCTCGCTCCACCGCCCAG AGTCTCGCTCCACCGCCCAGA GTCTCGCTCCACCGCCCAGAC TCTCGCTCCACCGCCCAGACT CTCGCTCCACCGCCCAGACTG TCGCTCCACCGCCCAGACTGG CGCTCCACCGCCCAGACTGGA GCTCCACCGCCCAGACTGGAG 
TGTATCTTCCTCCCCTGCTGT GTATCTTCCTCCCCTGCTGTA TATCTTCCTCCCCTGCTGTAT ATCTTCCTCCCCTGCTGTATG TCTTCCTCCCCTGCTGTATGT CTTCCTCCCCTGCTGTATGTT TTCCTCCCCTGCTGTATGTTT TCCTCCCCTGCTGTATGTTTC CCTCCCCTGCTGTATGTTTCC CTCCCCTGCTGTATGTTTCCT TCCCCTGCTGTATGTTTCCTG CCCCTGCTGTATGTTTCCTGC CCCTGCTGTATGTTTCCTGCC CCTGCTGTATGTTTCCTGCCC CTGCTGTATGTTTCCTGCCCT TGCTGTATGTTTCCTGCCCTC GCTGTATGTTTCCTGCCCTCA CTGTATGTTTCCTGCCCTCAA TGTATGTTTCCTGCCCTCAAA GTATGTTTCCTGCCCTCAAAC TATGTTTCCTGCCCTCAAACA ATGTTTCCTGCCCTCAAACAT TGTTTCCTGCCCTCAAACATC GTTTCCTGCCCTCAAACATCA TTTCCTGCCCTCAAACATCAC TTCCTGCCCTCAAACATCACA TCCTGCCCTCAAACATCACAC CCTGCCCTCAAACATCACACT CTGCCCTCAAACATCACACTC TGCCCTCAAACATCACACTCC GCCCTCAAACATCACACTCCA CCCTCAAACATCACACTCCAC CCTCAAACATCACACTCCACG CTCAAACATCACACTCCACGT TCAAACATCACACTCCACGTT CAAACATCACACTCCACGTTC AAACATCACACTCCACGTTCT AACATCACACTCCACGTTCTT ACATCACACTCCACGTTCTTC CATCACACTCCACGTTCTTCA ATCACACTCCACGTTCTTCAG TCACACTCCACGTTCTTCAGC CACACTCCACGTTCTTCAGCT ACACTCCACGTTCTTCAGCTT CACTCCACGTTCTTCAGCTTT ACTCCACGTTCTTCAGCTTTA CTCCACGTTCTTCAGCTTTAG TCCACGTTCTTCAGCTTTAGG CCACGTTCTTCAGCTTTAGGA CACGTTCTTCAGCTTTAGGAC ACGTTCTTCAGCTTTAGGACT CGTTCTTCAGCTTTAGGACTT GTTCTTCAGCTTTAGGACTTG TTCTTCAGCTTTAGGACTTGG TCTTCAGCTTTAGGACTTGGA 
TTTAATAAATGTTGATTGTCC TTAATAAATGTTGATTGTCCT TAATAAATGTTGATTGTCCTA AATAAATGTTGATTGTCCTAT ATAAATGTTGATTGTCCTATT TAAATGTTGATTGTCCTATTT AAATGTTGATTGTCCTATTTA AATGTTGATTGTCCTATTTAA ATGTTGATTGTCCTATTTAAT TGTTGATTGTCCTATTTAATT GTTGATTGTCCTATTTAATTA TTGATTGTCCTATTTAATTAT TGATTGTCCTATTTAATTATT GATTGTCCTATTTAATTATTC ATTGTCCTATTTAATTATTCT TTGTCCTATTTAATTATTCTC TGTCCTATTTAATTATTCTCA GTCCTATTTAATTATTCTCAA TCCTATTTAATTATTCTCAAC CCTATTTAATTATTCTCAACT CTATTTAATTATTCTCAACTT TATTTAATTATTCTCAACTTT ATTTAATTATTCTCAACTTTC TTTAATTATTCTCAACTTTCC TTAATTATTCTCAACTTTCCG TAATTATTCTCAACTTTCCGA AATTATTCTCAACTTTCCGAT ATTATTCTCAACTTTCCGATT TTATTCTCAACTTTCCGATTT TATTCTCAACTTTCCGATTTT ATTCTCAACTTTCCGATTTTA TTCTCAACTTTCCGATTTTAT TCTCAACTTTCCGATTTTATT CTCAACTTTCCGATTTTATTT TCAACTTTCCGATTTTATTTC CAACTTTCCGATTTTATTTCC AACTTTCCGATTTTATTTCCC ACTTTCCGATTTTATTTCCCA CTTTCCGATTTTATTTCCCAT TTTCCGATTTTATTTCCCATG TTCCGATTTTATTTCCCATGT TCCGATTTTATTTCCCATGTA CCGATTTTATTTCCCATGTAA CGATTTTATTTCCCATGTAAC GATTTTATTTCCCATGTAACA ATTTTATTTCCCATGTAACAG TTTTATTTCCCATGTAACAGT TTTATTTCCCATGTAACAGTG TTATTTCCCATGTAACAGTGT TATTTCCCATGTAACAGTGTT ATTTCCCATGTAACAGTGTTG TTTCCCATGTAACAGTGTTGT TTCCCATGTAACAGTGTTGTT TCCCATGTAACAGTGTTGTTT CCCATGTAACAGTGTTGTTTT 
TAAAATGCCCCCATCTTCCCA AAAATGCCCCCATCTTCCCAG AAATGCCCCCATCTTCCCAGA AATGCCCCCATCTTCCCAGAG ATGCCCCCATCTTCCCAGAGC TGCCCCCATCTTCCCAGAGCT GCCCCCATCTTCCCAGAGCTG CCCCCATCTTCCCAGAGCTGC CCCCATCTTCCCAGAGCTGCC CCCATCTTCCCAGAGCTGCCA CCATCTTCCCAGAGCTGCCAG CATCTTCCCAGAGCTGCCAGC ATCTTCCCAGAGCTGCCAGCC TCTTCCCAGAGCTGCCAGCCC CTTCCCAGAGCTGCCAGCCCT TTCCCAGAGCTGCCAGCCCTC TCCCAGAGCTGCCAGCCCTCA CCCAGAGCTGCCAGCCCTCAC CCAGAGCTGCCAGCCCTCACA CAGAGCTGCCAGCCCTCACAA AGAGCTGCCAGCCCTCACAAT GAGCTGCCAGCCCTCACAATG AGCTGCCAGCCCTCACAATGC GCTGCCAGCCCTCACAATGCC CTGCCAGCCCTCACAATGCCA TGCCAGCCCTCACAATGCCAA GCCAGCCCTCACAATGCCAAC CCAGCCCTCACAATGCCAACA CAGCCCTCACAATGCCAACAG AGCCCTCACAATGCCAACAGC GCCCTCACAATGCCAACAGCT CCCTCACAATGCCAACAGCTA CCTCACAATGCCAACAGCTAA CTCACAATGCCAACAGCTAAA TCACAATGCCAACAGCTAAAT CACAATGCCAACAGCTAAATG ACAATGCCAACAGCTAAATGT CAATGCCAACAGCTAAATGTA AATGCCAACAGCTAAATGTAC ATGCCAACAGCTAAATGTACC TGCCAACAGCTAAATGTACCC GCCAACAGCTAAATGTACCCA CCAACAGCTAAATGTACCCAA CAACAGCTAAATGTACCCAAG AACAGCTAAATGTACCCAAGT ACAGCTAAATGTACCCAAGTG CAGCTAAATGTACCCAAGTGT AGCTAAATGTACCCAAGTGTT GCTAAATGTACCCAAGTGTTA CTAAATGTACCCAAGTGTTAC TAAATGTACCCAAGTGTTACT AAATGTACCCAAGTGTTACTG AATGTACCCAAGTGTTACTGA ATGTACCCAAGTGTTACTGAA TGTACCCAAGTGTTACTGAAC 
TACAGGCACCCACCATCATGC ACAGGCACCCACCATCATGCC CAGGCACCCACCATCATGCCC AGGCACCCACCATCATGCCCA GGCACCCACCATCATGCCCAG GCACCCACCATCATGCCCAGC CACCCACCATCATGCCCAGCT ACCCACCATCATGCCCAGCTA CCCACCATCATGCCCAGCTAA CCACCATCATGCCCAGCTAAT CACCATCATGCCCAGCTAATT ACCATCATGCCCAGCTAATTT CCATCATGCCCAGCTAATTTT CATCATGCCCAGCTAATTTTT ATCATGCCCAGCTAATTTTTG TCATGCCCAGCTAATTTTTGT CATGCCCAGCTAATTTTTGTA ATGCCCAGCTAATTTTTGTAT TGCCCAGCTAATTTTTGTATT GCCCAGCTAATTTTTGTATTT CCCAGCTAATTTTTGTATTTT CCAGCTAATTTTTGTATTTTT CAGCTAATTTTTGTATTTTTG AGCTAATTTTTGTATTTTTGT GCTAATTTTTGTATTTTTGTA CTAATTTTTGTATTTTTGTAG TAATTTTTGTATTTTTGTAGA AATTTTTGTATTTTTGTAGAA ATTTTTGTATTTTTGTAGAAA TTTTTGTATTTTTGTAGAAAC TTTTGTATTTTTGTAGAAACG TTTGTATTTTTGTAGAAACGG TTGTATTTTTGTAGAAACGGG TGTATTTTTGTAGAAACGGGG GTATTTTTGTAGAAACGGGGT TATTTTTGTAGAAACGGGGTT ATTTTTGTAGAAACGGGGTTT TTTTTGTAGAAACGGGGTTTC TTTTGTAGAAACGGGGTTTCA TTTGTAGAAACGGGGTTTCAC TTGTAGAAACGGGGTTTCACC TGTAGAAACGGGGTTTCACCA GTAGAAACGGGGTTTCACCAT TAGAAACGGGGTTTCACCATG AGAAACGGGGTTTCACCATGT GAAACGGGGTTTCACCATGTT AAACGGGGTTTCACCATGTTG AACGGGGTTTCACCATGTTGG ACGGGGTTTCACCATGTTGGC CGGGGTTTCACCATGTTGGCC GGGGTTTCACCATGTTGGCCC GGGTTTCACCATGTTGGCCCA GGTTTCACCATGTTGGCCCAG GTTTCACCATGTTGGCCCAGC TTTCACCATGTTGGCCCAGCT 
GCTCACTGCAGCCTCAACCTC CTCACTGCAGCCTCAACCTCC TCACTGCAGCCTCAACCTCCT CACTGCAGCCTCAACCTCCTG ACTGCAGCCTCAACCTCCTGG CTGCAGCCTCAACCTCCTGGG TGCAGCCTCAACCTCCTGGGC GCAGCCTCAACCTCCTGGGCC CAGCCTCAACCTCCTGGGCCC AGCCTCAACCTCCTGGGCCCA GCCTCAACCTCCTGGGCCCAA CCTCAACCTCCTGGGCCCAAG CTCAACCTCCTGGGCCCAAGT TCAACCTCCTGGGCCCAAGTG CAACCTCCTGGGCCCAAGTGA AACCTCCTGGGCCCAAGTGAT ACCTCCTGGGCCCAAGTGATT CCTCCTGGGCCCAAGTGATTT CTCCTGGGCCCAAGTGATTTC TCCTGGGCCCAAGTGATTTCA CCTGGGCCCAAGTGATTTCAT CTGGGCCCAAGTGATTTCATC TGGGCCCAAGTGATTTCATCT GGGCCCAAGTGATTTCATCTT GGCCCAAGTGATTTCATCTTA GCCCAAGTGATTTCATCTTAT CCCAAGTGATTTCATCTTATT CCAAGTGATTTCATCTTATTT CAAGTGATTTCATCTTATTTT AAGTGATTTCATCTTATTTTT AGTGATTTCATCTTATTTTTG GTGATTTCATCTTATTTTTGG TGATTTCATCTTATTTTTGGA GATTTCATCTTATTTTTGGAA ATTTCATCTTATTTTTGGAAA TTTCATCTTATTTTTGGAAAA TTCATCTTATTTTTGGAAAAA TCATCTTATTTTTGGAAAAAA CATCTTATTTTTGGAAAAAAA ATCTTATTTTTGGAAAAAAAA TCTTATTTTTGGAAAAAAAAA CTTATTTTTGGAAAAAAAAAC TTATTTTTGGAAAAAAAAACA TATTTTTGGAAAAAAAAACAA ATTTTTGGAAAAAAAAACAAA TTTTTGGAAAAAAAAACAAAC TTTTGGAAAAAAAAACAAACT TTTGGAAAAAAAAACAAACTA TTGGAAAAAAAAACAAACTAA TGGAAAAAAAAACAAACTAAA GGAAAAAAAAACAAACTAAAC GAAAAAAAAACAAACTAAACC AAAAAAAAACAAACTAAACCA AAAAAAAACAAACTAAACCAA AAAAAAACAAACTAAACCAAA 
TTTCTTTTTCTTTCTTTCTTT TTCTTTTTCTTTCTTTCTTTC TCTTTTTCTTTCTTTCTTTCT CTTTTTCTTTCTTTCTTTCTT TTTTTCTTTCTTTCTTTCTTT TTTTCTTTCTTTCTTTCTTTC TTTCTTTCTTTCTTTCTTTCT TTCTTTCTTTCTTTCTTTCTT TCTTTCTTTCTTTCTTTCTTT CTTTCTTTCTTTCTTTCTTTC TTTCTTTCTTTCTTTCTTTCT TTCTTTCTTTCTTTCTTTCTT TCTTTCTTTCTTTCTTTCTTT CTTTCTTTCTTTCTTTCTTTT TTTCTTTCTTTCTTTCTTTTT TTCTTTCTTTCTTTCTTTTTC TCTTTCTTTCTTTCTTTTTCT CTTTCTTTCTTTCTTTTTCTT TTTCTTTCTTTCTTTTTCTTT TTCTTTCTTTCTTTTTCTTTC TCTTTCTTTCTTTTTCTTTCT CTTTCTTTCTTTTTCTTTCTT TTTCTTTCTTTTTCTTTCTTT TTCTTTCTTTTTCTTTCTTTC TCTTTCTTTTTCTTTCTTTCT CTTTCTTTTTCTTTCTTTCTT TTTCTTTTTCTTTCTTTCTTT TTCTTTTTCTTTCTTTCTTTC TCTTTTTCTTTCTTTCTTTCT CTTTTTCTTTCTTTCTTTCTC TTTTTCTTTCTTTCTTTCTCT TTTTCTTTCTTTCTTTCTCTT TTTCTTTCTTTCTTTCTCTTT TTCTTTCTTTCTTTCTCTTTC TCTTTCTTTCTTTCTCTTTCT CTTTCTTTCTTTCTCTTTCTT TTTCTTTCTTTCTCTTTCTTT TTCTTTCTTTCTCTTTCTTTC TCTTTCTTTCTCTTTCTTTCT CTTTCTTTCTCTTTCTTTCTT TTTCTTTCTCTTTCTTTCTTT TTCTTTCTCTTTCTTTCTTTC TCTTTCTCTTTCTTTCTTTCT CTTTCTCTTTCTTTCTTTCTT TTTCTCTTTCTTTCTTTCTTT TTCTCTTTCTTTCTTTCTTTC TCTCTTTCTTTCTTTCTTTCT CTCTTTCTTTCTTTCTTTCTT TCTTTCTTTCTTTCTTTCTTT CTTTCTTTCTTTCTTTCTTTC TTTCTTTCTTTCTTTCTTTCT TTCTTTCTTTCTTTCTTTCTT TCTTTCTTTCTTTCTTTCTTT CTTTCTTTCTTTCTTTCTTTC TTTCTTTCTTTCTTTCTTTCT 
TCATGTAGCATGCATATGGCT CATGTAGCATGCATATGGCTA ATGTAGCATGCATATGGCTAA TGTAGCATGCATATGGCTAAC GTAGCATGCATATGGCTAACG TAGCATGCATATGGCTAACGG AGCATGCATATGGCTAACGGC GCATGCATATGGCTAACGGCA CATGCATATGGCTAACGGCAA ATGCATATGGCTAACGGCAAA TGCATATGGCTAACGGCAAAG GCATATGGCTAACGGCAAAGT CATATGGCTAACGGCAAAGTG ATATGGCTAACGGCAAAGTGA TATGGCTAACGGCAAAGTGAG ATGGCTAACGGCAAAGTGAGG TGGCTAACGGCAAAGTGAGGG GGCTAACGGCAAAGTGAGGGA GCTAACGGCAAAGTGAGGGAG CTAACGGCAAAGTGAGGGAGG TAACGGCAAAGTGAGGGAGGA AACGGCAAAGTGAGGGAGGAA ACGGCAAAGTGAGGGAGGAAT CGGCAAAGTGAGGGAGGAATA GGCAAAGTGAGGGAGGAATAA GCAAAGTGAGGGAGGAATAAT CAAAGTGAGGGAGGAATAATT AAAGTGAGGGAGGAATAATTA AAGTGAGGGAGGAATAATTAT AGTGAGGGAGGAATAATTATA GTGAGGGAGGAATAATTATAG TGAGGGAGGAATAATTATAGT GAGGGAGGAATAATTATAGTA AGGGAGGAATAATTATAGTAA GGGAGGAATAATTATAGTAAT GGAGGAATAATTATAGTAATA GAGGAATAATTATAGTAATAA AGGAATAATTATAGTAATAAT GGAATAATTATAGTAATAATC GAATAATTATAGTAATAATCA AATAATTATAGTAATAATCAC ATAATTATAGTAATAATCACA TAATTATAGTAATAATCACAG AATTATAGTAATAATCACAGT ATTATAGTAATAATCACAGTG TTATAGTAATAATCACAGTGA TATAGTAATAATCACAGTGAT ATAGTAATAATCACAGTGATG TAGTAATAATCACAGTGATGA AGTAATAATCACAGTGATGAC GTAATAATCACAGTGATGACG TAATAATCACAGTGATGACGT AATAATCACAGTGATGACGTG ATAATCACAGTGATGACGTGG TAATCACAGTGATGACGTGGA 
GCTCAGGCCTTGCAAGAATCT CTCAGGCCTTGCAAGAATCTC TCAGGCCTTGCAAGAATCTCT CAGGCCTTGCAAGAATCTCTA AGGCCTTGCAAGAATCTCTAC GGCCTTGCAAGAATCTCTACT GCCTTGCAAGAATCTCTACTG CCTTGCAAGAATCTCTACTGC CTTGCAAGAATCTCTACTGCC TTGCAAGAATCTCTACTGCCC TGCAAGAATCTCTACTGCCCA GCAAGAATCTCTACTGCCCAA CAAGAATCTCTACTGCCCAAC AAGAATCTCTACTGCCCAACA AGAATCTCTACTGCCCAACAA GAATCTCTACTGCCCAACAAG AATCTCTACTGCCCAACAAGT ATCTCTACTGCCCAACAAGTC TCTCTACTGCCCAACAAGTCC CTCTACTGCCCAACAAGTCCC TCTACTGCCCAACAAGTCCCT CTACTGCCCAACAAGTCCCTA TACTGCCCAACAAGTCCCTAC ACTGCCCAACAAGTCCCTACA CTGCCCAACAAGTCCCTACAA TGCCCAACAAGTCCCTACAAG GCCCAACAAGTCCCTACAAGA CCCAACAAGTCCCTACAAGAT CCAACAAGTCCCTACAAGATG CAACAAGTCCCTACAAGATGG AACAAGTCCCTACAAGATGGC ACAAGTCCCTACAAGATGGCA CAAGTCCCTACAAGATGGCAT AAGTCCCTACAAGATGGCATT AGTCCCTACAAGATGGCATTT GTCCCTACAAGATGGCATTTA TCCCTACAAGATGGCATTTAA CCCTACAAGATGGCATTTAAA CCTACAAGATGGCATTTAAAA CTACAAGATGGCATTTAAAAG TACAAGATGGCATTTAAAAGC ACAAGATGGCATTTAAAAGCA CAAGATGGCATTTAAAAGCAG AAGATGGCATTTAAAAGCAGT AGATGGCATTTAAAAGCAGTC GATGGCATTTAAAAGCAGTCC ATGGCATTTAAAAGCAGTCCC TGGCATTTAAAAGCAGTCCCT GGCATTTAAAAGCAGTCCCTC GCATTTAAAAGCAGTCCCTCA CATTTAAAAGCAGTCCCTCAC ATTTAAAAGCAGTCCCTCACG TTTAAAAGCAGTCCCTCACGC TTAAAAGCAGTCCCTCACGCA TAAAAGCAGTCCCTCACGCAC 
CCTAGAGAAGCTCCCACTAGG CTAGAGAAGCTCCCACTAGGG TAGAGAAGCTCCCACTAGGGC AGAGAAGCTCCCACTAGGGCT GAGAAGCTCCCACTAGGGCTG AGAAGCTCCCACTAGGGCTGC GAAGCTCCCACTAGGGCTGCA AAGCTCCCACTAGGGCTGCAG AGCTCCCACTAGGGCTGCAGT GCTCCCACTAGGGCTGCAGTC CTCCCACTAGGGCTGCAGTCA TCCCACTAGGGCTGCAGTCAA CCCACTAGGGCTGCAGTCAAT CCACTAGGGCTGCAGTCAATT CACTAGGGCTGCAGTCAATTC ACTAGGGCTGCAGTCAATTCC CTAGGGCTGCAGTCAATTCCC TAGGGCTGCAGTCAATTCCCA AGGGCTGCAGTCAATTCCCAG GGGCTGCAGTCAATTCCCAGG GGCTGCAGTCAATTCCCAGGT GCTGCAGTCAATTCCCAGGTC CTGCAGTCAATTCCCAGGTCT TGCAGTCAATTCCCAGGTCTT GCAGTCAATTCCCAGGTCTTA CAGTCAATTCCCAGGTCTTAG AGTCAATTCCCAGGTCTTAGG GTCAATTCCCAGGTCTTAGGT TCAATTCCCAGGTCTTAGGTG CAATTCCCAGGTCTTAGGTGC AATTCCCAGGTCTTAGGTGCT ATTCCCAGGTCTTAGGTGCTG TTCCCAGGTCTTAGGTGCTGA TCCCAGGTCTTAGGTGCTGAG CCCAGGTCTTAGGTGCTGAGC CCAGGTCTTAGGTGCTGAGCA CAGGTCTTAGGTGCTGAGCAG AGGTCTTAGGTGCTGAGCAGT GGTCTTAGGTGCTGAGCAGTG GTCTTAGGTGCTGAGCAGTGG TCTTAGGTGCTGAGCAGTGGG CTTAGGTGCTGAGCAGTGGGA TTAGGTGCTGAGCAGTGGGAG TAGGTGCTGAGCAGTGGGAGG AGGTGCTGAGCAGTGGGAGGT GGTGCTGAGCAGTGGGAGGTG GTGCTGAGCAGTGGGAGGTGG TGCTGAGCAGTGGGAGGTGGT GCTGAGCAGTGGGAGGTGGTG CTGAGCAGTGGGAGGTGGTGG TGAGCAGTGGGAGGTGGTGGC GAGCAGTGGGAGGTGGTGGCC AGCAGTGGGAGGTGGTGGCCA GCAGTGGGAGGTGGTGGCCAT CAGTGGGAGGTGGTGGCCATG 
AAATAAAGTTTGGCTTTCAGT AATAAAGTTTGGCTTTCAGTT ATAAAGTTTGGCTTTCAGTTG TAAAGTTTGGCTTTCAGTTGT AAAGTTTGGCTTTCAGTTGTA AAGTTTGGCTTTCAGTTGTAA AGTTTGGCTTTCAGTTGTAAC GTTTGGCTTTCAGTTGTAACT TTTGGCTTTCAGTTGTAACTT TTGGCTTTCAGTTGTAACTTT TGGCTTTCAGTTGTAACTTTG GGCTTTCAGTTGTAACTTTGA GCTTTCAGTTGTAACTTTGAA CTTTCAGTTGTAACTTTGAAT TTTCAGTTGTAACTTTGAATA TTCAGTTGTAACTTTGAATAT TCAGTTGTAACTTTGAATATC CAGTTGTAACTTTGAATATCT AGTTGTAACTTTGAATATCTT GTTGTAACTTTGAATATCTTT TTGTAACTTTGAATATCTTTA TGTAACTTTGAATATCTTTAT GTAACTTTGAATATCTTTATC TAACTTTGAATATCTTTATCA AACTTTGAATATCTTTATCAC ACTTTGAATATCTTTATCACA CTTTGAATATCTTTATCACAG TTTGAATATCTTTATCACAGT TTGAATATCTTTATCACAGTT TGAATATCTTTATCACAGTTA GAATATCTTTATCACAGTTAT AATATCTTTATCACAGTTATT ATATCTTTATCACAGTTATTT TATCTTTATCACAGTTATTTA ATCTTTATCACAGTTATTTAA TCTTTATCACAGTTATTTAAA CTTTATCACAGTTATTTAAAG TTTATCACAGTTATTTAAAGC TTATCACAGTTATTTAAAGCC TATCACAGTTATTTAAAGCCT ATCACAGTTATTTAAAGCCTT TCACAGTTATTTAAAGCCTTT CACAGTTATTTAAAGCCTTTA ACAGTTATTTAAAGCCTTTAA CAGTTATTTAAAGCCTTTAAA AGTTATTTAAAGCCTTTAAAA GTTATTTAAAGCCTTTAAAAA TTATTTAAAGCCTTTAAAAAG TATTTAAAGCCTTTAAAAAGC ATTTAAAGCCTTTAAAAAGCT TTTAAAGCCTTTAAAAAGCTT TTAAAGCCTTTAAAAAGCTTT TAAAGCCTTTAAAAAGCTTTA AAAGCCTTTAAAAAGCTTTAA AAGCCTTTAAAAAGCTTTAAT 
TGTGTAACTAACATAATTGGC GTGTAACTAACATAATTGGCA TGTAACTAACATAATTGGCAC GTAACTAACATAATTGGCACT TAACTAACATAATTGGCACTG AACTAACATAATTGGCACTGT ACTAACATAATTGGCACTGTC CTAACATAATTGGCACTGTCC TAACATAATTGGCACTGTCCC AACATAATTGGCACTGTCCCT ACATAATTGGCACTGTCCCTG CATAATTGGCACTGTCCCTGT ATAATTGGCACTGTCCCTGTA TAATTGGCACTGTCCCTGTAA AATTGGCACTGTCCCTGTAAA ATTGGCACTGTCCCTGTAAAT TTGGCACTGTCCCTGTAAATT TGGCACTGTCCCTGTAAATTC GGCACTGTCCCTGTAAATTCA GCACTGTCCCTGTAAATTCAA CACTGTCCCTGTAAATTCAAA ACTGTCCCTGTAAATTCAAAT CTGTCCCTGTAAATTCAAATT TGTCCCTGTAAATTCAAATTG GTCCCTGTAAATTCAAATTGG TCCCTGTAAATTCAAATTGGA CCCTGTAAATTCAAATTGGAT CCTGTAAATTCAAATTGGATA CTGTAAATTCAAATTGGATAT TGTAAATTCAAATTGGATATC GTAAATTCAAATTGGATATCC TAAATTCAAATTGGATATCCT AAATTCAAATTGGATATCCTC AATTCAAATTGGATATCCTCC ATTCAAATTGGATATCCTCCC TTCAAATTGGATATCCTCCCA TCAAATTGGATATCCTCCCAA CAAATTGGATATCCTCCCAAA AAATTGGATATCCTCCCAAAT AATTGGATATCCTCCCAAATT ATTGGATATCCTCCCAAATTT TTGGATATCCTCCCAAATTTT TGGATATCCTCCCAAATTTTA GGATATCCTCCCAAATTTTAT GATATCCTCCCAAATTTTATT ATATCCTCCCAAATTTTATTT TATCCTCCCAAATTTTATTTA ATCCTCCCAAATTTTATTTAA TCCTCCCAAATTTTATTTAAG CCTCCCAAATTTTATTTAAGC CTCCCAAATTTTATTTAAGCA TCCCAAATTTTATTTAAGCAA CCCAAATTTTATTTAAGCAAT CCAAATTTTATTTAAGCAATT CAAATTTTATTTAAGCAATTG 
TTTATTTTTTGAGCATGAAAG TTATTTTTTGAGCATGAAAGT TATTTTTTGAGCATGAAAGTA ATTTTTTGAGCATGAAAGTAA TTTTTTGAGCATGAAAGTAAT TTTTTGAGCATGAAAGTAATA TTTTGAGCATGAAAGTAATAT TTTGAGCATGAAAGTAATATA TTGAGCATGAAAGTAATATAT TGAGCATGAAAGTAATATATG GAGCATGAAAGTAATATATGC AGCATGAAAGTAATATATGCT GCATGAAAGTAATATATGCTC CATGAAAGTAATATATGCTCA ATGAAAGTAATATATGCTCAG TGAAAGTAATATATGCTCAGT GAAAGTAATATATGCTCAGTG AAAGTAATATATGCTCAGTGT AAGTAATATATGCTCAGTGTA AGTAATATATGCTCAGTGTAA GTAATATATGCTCAGTGTAAA TAATATATGCTCAGTGTAAAC AATATATGCTCAGTGTAAACA ATATATGCTCAGTGTAAACAA TATATGCTCAGTGTAAACAAT ATATGCTCAGTGTAAACAATT TATGCTCAGTGTAAACAATTA ATGCTCAGTGTAAACAATTAG TGCTCAGTGTAAACAATTAGG GCTCAGTGTAAACAATTAGGT CTCAGTGTAAACAATTAGGTC TCAGTGTAAACAATTAGGTCA CAGTGTAAACAATTAGGTCAT AGTGTAAACAATTAGGTCATT GTGTAAACAATTAGGTCATTA TGTAAACAATTAGGTCATTAT GTAAACAATTAGGTCATTATA TAAACAATTAGGTCATTATAA AAACAATTAGGTCATTATAAA AACAATTAGGTCATTATAAAT ACAATTAGGTCATTATAAATA CAATTAGGTCATTATAAATAT AATTAGGTCATTATAAATATA ATTAGGTCATTATAAATATAT TTAGGTCATTATAAATATATT TAGGTCATTATAAATATATTT AGGTCATTATAAATATATTTA GGTCATTATAAATATATTTAA GTCATTATAAATATATTTAAC TCATTATAAATATATTTAACA CATTATAAATATATTTAACAG ATTATAAATATATTTAACAGG TTATAAATATATTTAACAGGA TATAAATATATTTAACAGGAA ATAAATATATTTAACAGGAAT 
CTCAGGTGATCCACCCGCCTC TCAGGTGATCCACCCGCCTCG CAGGTGATCCACCCGCCTCGG AGGTGATCCACCCGCCTCGGC GGTGATCCACCCGCCTCGGCC GTGATCCACCCGCCTCGGCCT TGATCCACCCGCCTCGGCCTC GATCCACCCGCCTCGGCCTCC ATCCACCCGCCTCGGCCTCCC TCCACCCGCCTCGGCCTCCCA CCACCCGCCTCGGCCTCCCAA CACCCGCCTCGGCCTCCCAAA ACCCGCCTCGGCCTCCCAAAG CCCGCCTCGGCCTCCCAAAGT CCGCCTCGGCCTCCCAAAGTG CGCCTCGGCCTCCCAAAGTGC GCCTCGGCCTCCCAAAGTGCT CCTCGGCCTCCCAAAGTGCTG CTCGGCCTCCCAAAGTGCTGG TCGGCCTCCCAAAGTGCTGGG CGGCCTCCCAAAGTGCTGGGA GGCCTCCCAAAGTGCTGGGAC GCCTCCCAAAGTGCTGGGACT CCTCCCAAAGTGCTGGGACTA CTCCCAAAGTGCTGGGACTAC TCCCAAAGTGCTGGGACTACA CCCAAAGTGCTGGGACTACAG CCAAAGTGCTGGGACTACAGG CAAAGTGCTGGGACTACAGGC AAAGTGCTGGGACTACAGGCA AAGTGCTGGGACTACAGGCAT AGTGCTGGGACTACAGGCATG GTGCTGGGACTACAGGCATGA TGCTGGGACTACAGGCATGAG GCTGGGACTACAGGCATGAGG CTGGGACTACAGGCATGAGGC TGGGACTACAGGCATGAGGCA GGGACTACAGGCATGAGGCAC GGACTACAGGCATGAGGCACC GACTACAGGCATGAGGCACCG ACTACAGGCATGAGGCACCGC CTACAGGCATGAGGCACCGCG TACAGGCATGAGGCACCGCGC ACAGGCATGAGGCACCGCGCC CAGGCATGAGGCACCGCGCCT AGGCATGAGGCACCGCGCCTG GGCATGAGGCACCGCGCCTGG GCATGAGGCACCGCGCCTGGC CATGAGGCACCGCGCCTGGCC ATGAGGCACCGCGCCTGGCCA TGAGGCACCGCGCCTGGCCAG GAGGCACCGCGCCTGGCCAGG AGGCACCGCGCCTGGCCAGGA GGCACCGCGCCTGGCCAGGAC GCACCGCGCCTGGCCAGGACT 
GACAAGATAGTACTTGAGCTA ACAAGATAGTACTTGAGCTAA CAAGATAGTACTTGAGCTAAG AAGATAGTACTTGAGCTAAGC AGATAGTACTTGAGCTAAGCC GATAGTACTTGAGCTAAGCCT ATAGTACTTGAGCTAAGCCTT TAGTACTTGAGCTAAGCCTTG AGTACTTGAGCTAAGCCTTGC GTACTTGAGCTAAGCCTTGCA TACTTGAGCTAAGCCTTGCAG ACTTGAGCTAAGCCTTGCAGG CTTGAGCTAAGCCTTGCAGGT TTGAGCTAAGCCTTGCAGGTT TGAGCTAAGCCTTGCAGGTTG GAGCTAAGCCTTGCAGGTTGA AGCTAAGCCTTGCAGGTTGAG GCTAAGCCTTGCAGGTTGAGT CTAAGCCTTGCAGGTTGAGTA TAAGCCTTGCAGGTTGAGTAG AAGCCTTGCAGGTTGAGTAGG AGCCTTGCAGGTTGAGTAGGA GCCTTGCAGGTTGAGTAGGAT CCTTGCAGGTTGAGTAGGATT CTTGCAGGTTGAGTAGGATTA TTGCAGGTTGAGTAGGATTAT TGCAGGTTGAGTAGGATTATT GCAGGTTGAGTAGGATTATTC CAGGTTGAGTAGGATTATTCT AGGTTGAGTAGGATTATTCTA GGTTGAGTAGGATTATTCTAG GTTGAGTAGGATTATTCTAGT TTGAGTAGGATTATTCTAGTG TGAGTAGGATTATTCTAGTGG GAGTAGGATTATTCTAGTGGA AGTAGGATTATTCTAGTGGAA GTAGGATTATTCTAGTGGAAT TAGGATTATTCTAGTGGAATT AGGATTATTCTAGTGGAATTT GGATTATTCTAGTGGAATTTA GATTATTCTAGTGGAATTTAG ATTATTCTAGTGGAATTTAGG TTATTCTAGTGGAATTTAGGG TATTCTAGTGGAATTTAGGGA ATTCTAGTGGAATTTAGGGAA TTCTAGTGGAATTTAGGGAAA TCTAGTGGAATTTAGGGAAAC CTAGTGGAATTTAGGGAAACG TAGTGGAATTTAGGGAAACGA AGTGGAATTTAGGGAAACGAT GTGGAATTTAGGGAAACGATG TGGAATTTAGGGAAACGATGT GGAATTTAGGGAAACGATGTG GAATTTAGGGAAACGATGTGC AATTTAGGGAAACGATGTGCA 
CTACTCTCATTGACTGTTCAA TACTCTCATTGACTGTTCAAT ACTCTCATTGACTGTTCAATG CTCTCATTGACTGTTCAATGC TCTCATTGACTGTTCAATGCC CTCATTGACTGTTCAATGCCT TCATTGACTGTTCAATGCCTA CATTGACTGTTCAATGCCTAT ATTGACTGTTCAATGCCTATA TTGACTGTTCAATGCCTATAC TGACTGTTCAATGCCTATACA GACTGTTCAATGCCTATACAA ACTGTTCAATGCCTATACAAG CTGTTCAATGCCTATACAAGT TGTTCAATGCCTATACAAGTA GTTCAATGCCTATACAAGTAA TTCAATGCCTATACAAGTAAA TCAATGCCTATACAAGTAAAA CAATGCCTATACAAGTAAAAC AATGCCTATACAAGTAAAACT ATGCCTATACAAGTAAAACTT TGCCTATACAAGTAAAACTTT GCCTATACAAGTAAAACTTTA CCTATACAAGTAAAACTTTAC CTATACAAGTAAAACTTTACC TATACAAGTAAAACTTTACCA ATACAAGTAAAACTTTACCAG TACAAGTAAAACTTTACCAGC ACAAGTAAAACTTTACCAGCA CAAGTAAAACTTTACCAGCAC AAGTAAAACTTTACCAGCACC AGTAAAACTTTACCAGCACCC GTAAAACTTTACCAGCACCCA TAAAACTTTACCAGCACCCAA AAAACTTTACCAGCACCCAAG AAACTTTACCAGCACCCAAGT AACTTTACCAGCACCCAAGTC ACTTTACCAGCACCCAAGTCA CTTTACCAGCACCCAAGTCAA TTTACCAGCACCCAAGTCAAA TTACCAGCACCCAAGTCAAAA TACCAGCACCCAAGTCAAAAA ACCAGCACCCAAGTCAAAAAG CCAGCACCCAAGTCAAAAAGA CAGCACCCAAGTCAAAAAGAA AGCACCCAAGTCAAAAAGAAA GCACCCAAGTCAAAAAGAAAA CACCCAAGTCAAAAAGAAAAA ACCCAAGTCAAAAAGAAAAAA CCCAAGTCAAAAAGAAAAAAA CCAAGTCAAAAAGAAAAAAAA CAAGTCAAAAAGAAAAAAAAG AAGTCAAAAAGAAAAAAAAGG AGTCAAAAAGAAAAAAAAGGG GTCAAAAAGAAAAAAAAGGGG 
CTCATTCTCTCTCCTGCTGCA TCATTCTCTCTCCTGCTGCAC CATTCTCTCTCCTGCTGCACT ATTCTCTCTCCTGCTGCACTG TTCTCTCTCCTGCTGCACTGT TCTCTCTCCTGCTGCACTGTG CTCTCTCCTGCTGCACTGTGA TCTCTCCTGCTGCACTGTGAA CTCTCCTGCTGCACTGTGAAG TCTCCTGCTGCACTGTGAAGA CTCCTGCTGCACTGTGAAGAG TCCTGCTGCACTGTGAAGAGG CCTGCTGCACTGTGAAGAGGT CTGCTGCACTGTGAAGAGGTG TGCTGCACTGTGAAGAGGTGC GCTGCACTGTGAAGAGGTGCC CTGCACTGTGAAGAGGTGCCT TGCACTGTGAAGAGGTGCCTG GCACTGTGAAGAGGTGCCTGT CACTGTGAAGAGGTGCCTGTT ACTGTGAAGAGGTGCCTGTTG CTGTGAAGAGGTGCCTGTTGC TGTGAAGAGGTGCCTGTTGCC GTGAAGAGGTGCCTGTTGCCA TGAAGAGGTGCCTGTTGCCAA GAAGAGGTGCCTGTTGCCAAG AAGAGGTGCCTGTTGCCAAGA AGAGGTGCCTGTTGCCAAGAG GAGGTGCCTGTTGCCAAGAGT AGGTGCCTGTTGCCAAGAGTA GGTGCCTGTTGCCAAGAGTAT GTGCCTGTTGCCAAGAGTATA TGCCTGTTGCCAAGAGTATAA GCCTGTTGCCAAGAGTATAAG CCTGTTGCCAAGAGTATAAGT CTGTTGCCAAGAGTATAAGTT TGTTGCCAAGAGTATAAGTTT GTTGCCAAGAGTATAAGTTTC TTGCCAAGAGTATAAGTTTCC TGCCAAGAGTATAAGTTTCCT GCCAAGAGTATAAGTTTCCTG CCAAGAGTATAAGTTTCCTGA CAAGAGTATAAGTTTCCTGAG AAGAGTATAAGTTTCCTGAGG AGAGTATAAGTTTCCTGAGGC GAGTATAAGTTTCCTGAGGCC AGTATAAGTTTCCTGAGGCCT GTATAAGTTTCCTGAGGCCTC TATAAGTTTCCTGAGGCCTCC ATAAGTTTCCTGAGGCCTCCC TAAGTTTCCTGAGGCCTCCCA AAGTTTCCTGAGGCCTCCCAG AGTTTCCTGAGGCCTCCCAGG GTTTCCTGAGGCCTCCCAGGC TTTCCTGAGGCCTCCCAGGCC 
AAATTAAACAGCTCGTTTAAC AATTAAACAGCTCGTTTAACT ATTAAACAGCTCGTTTAACTG TTAAACAGCTCGTTTAACTGA TAAACAGCTCGTTTAACTGAT AAACAGCTCGTTTAACTGATA AACAGCTCGTTTAACTGATAA ACAGCTCGTTTAACTGATAAT CAGCTCGTTTAACTGATAATC AGCTCGTTTAACTGATAATCC GCTCGTTTAACTGATAATCCA CTCGTTTAACTGATAATCCAT TCGTTTAACTGATAATCCATA CGTTTAACTGATAATCCATAC GTTTAACTGATAATCCATACT TTTAACTGATAATCCATACTA TTAACTGATAATCCATACTAT TAACTGATAATCCATACTATA AACTGATAATCCATACTATAT ACTGATAATCCATACTATATT CTGATAATCCATACTATATTT TGATAATCCATACTATATTTG GATAATCCATACTATATTTGA ATAATCCATACTATATTTGAG TAATCCATACTATATTTGAGT AATCCATACTATATTTGAGTA ATCCATACTATATTTGAGTAG TCCATACTATATTTGAGTAGG CCATACTATATTTGAGTAGGG CATACTATATTTGAGTAGGGC ATACTATATTTGAGTAGGGCT TACTATATTTGAGTAGGGCTG ACTATATTTGAGTAGGGCTGT CTATATTTGAGTAGGGCTGTC TATATTTGAGTAGGGCTGTCA ATATTTGAGTAGGGCTGTCAC TATTTGAGTAGGGCTGTCACA ATTTGAGTAGGGCTGTCACAT TTTGAGTAGGGCTGTCACATG TTGAGTAGGGCTGTCACATGG TGAGTAGGGCTGTCACATGGT GAGTAGGGCTGTCACATGGTT AGTAGGGCTGTCACATGGTTG GTAGGGCTGTCACATGGTTGG TAGGGCTGTCACATGGTTGGA AGGGCTGTCACATGGTTGGAA GGGCTGTCACATGGTTGGAAC GGCTGTCACATGGTTGGAACC GCTGTCACATGGTTGGAACCT CTGTCACATGGTTGGAACCTC TGTCACATGGTTGGAACCTCC GTCACATGGTTGGAACCTCCG TCACATGGTTGGAACCTCCGG CACATGGTTGGAACCTCCGGT ACATGGTTGGAACCTCCGGTT 
AGACTGGGTCTCACTATGTTG GACTGGGTCTCACTATGTTGC ACTGGGTCTCACTATGTTGCC CTGGGTCTCACTATGTTGCCT TGGGTCTCACTATGTTGCCTA GGGTCTCACTATGTTGCCTAG GGTCTCACTATGTTGCCTAGG GTCTCACTATGTTGCCTAGGC TCTCACTATGTTGCCTAGGCT CTCACTATGTTGCCTAGGCTG TCACTATGTTGCCTAGGCTGG CACTATGTTGCCTAGGCTGGT ACTATGTTGCCTAGGCTGGTC CTATGTTGCCTAGGCTGGTCT TATGTTGCCTAGGCTGGTCTC ATGTTGCCTAGGCTGGTCTCA TGTTGCCTAGGCTGGTCTCAA GTTGCCTAGGCTGGTCTCAAA TTGCCTAGGCTGGTCTCAAAC TGCCTAGGCTGGTCTCAAACT GCCTAGGCTGGTCTCAAACTC CCTAGGCTGGTCTCAAACTCC CTAGGCTGGTCTCAAACTCCT TAGGCTGGTCTCAAACTCCTG AGGCTGGTCTCAAACTCCTGG GGCTGGTCTCAAACTCCTGGG GCTGGTCTCAAACTCCTGGGC CTGGTCTCAAACTCCTGGGCT TGGTCTCAAACTCCTGGGCTC GGTCTCAAACTCCTGGGCTCA GTCTCAAACTCCTGGGCTCAA TCTCAAACTCCTGGGCTCAAG CTCAAACTCCTGGGCTCAAGT TCAAACTCCTGGGCTCAAGTG CAAACTCCTGGGCTCAAGTGA AAACTCCTGGGCTCAAGTGAT AACTCCTGGGCTCAAGTGATC ACTCCTGGGCTCAAGTGATCC CTCCTGGGCTCAAGTGATCCA TCCTGGGCTCAAGTGATCCAT CCTGGGCTCAAGTGATCCATC CTGGGCTCAAGTGATCCATCT TGGGCTCAAGTGATCCATCTC GGGCTCAAGTGATCCATCTCT GGCTCAAGTGATCCATCTCTG GCTCAAGTGATCCATCTCTGC CTCAAGTGATCCATCTCTGCC TCAAGTGATCCATCTCTGCCT CAAGTGATCCATCTCTGCCTT AAGTGATCCATCTCTGCCTTC AGTGATCCATCTCTGCCTTCC GTGATCCATCTCTGCCTTCCA TGATCCATCTCTGCCTTCCAA GATCCATCTCTGCCTTCCAAA ATCCATCTCTGCCTTCCAAAG 


sliding函数:



分析: k-mer.count=Sequence.length-k+1=75-21+1=55

Sequence:75(length) =>1
GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA
K-mer:21(length)=>55
GTATAAGAGCAGCCTTATTCC TATAAGAGCAGCCTTATTCCT ATAAGAGCAGCCTTATTCCTA TAAGAGCAGCCTTATTCCTAT AAGAGCAGCCTTATTCCTATT AGAGCAGCCTTATTCCTATTT GAGCAGCCTTATTCCTATTTA AGCAGCCTTATTCCTATTTAT GCAGCCTTATTCCTATTTATA CAGCCTTATTCCTATTTATAA AGCCTTATTCCTATTTATAAT GCCTTATTCCTATTTATAATC CCTTATTCCTATTTATAATCA CTTATTCCTATTTATAATCAG TTATTCCTATTTATAATCAGG TATTCCTATTTATAATCAGGG ATTCCTATTTATAATCAGGGT TTCCTATTTATAATCAGGGTG TCCTATTTATAATCAGGGTGA CCTATTTATAATCAGGGTGAA CTATTTATAATCAGGGTGAAA TATTTATAATCAGGGTGAAAC ATTTATAATCAGGGTGAAACA TTTATAATCAGGGTGAAACAC TTATAATCAGGGTGAAACACC TATAATCAGGGTGAAACACCT ATAATCAGGGTGAAACACCTG TAATCAGGGTGAAACACCTGT AATCAGGGTGAAACACCTGTG ATCAGGGTGAAACACCTGTGC TCAGGGTGAAACACCTGTGCC CAGGGTGAAACACCTGTGCCA AGGGTGAAACACCTGTGCCAA GGGTGAAACACCTGTGCCAAT GGTGAAACACCTGTGCCAATG GTGAAACACCTGTGCCAATGC TGAAACACCTGTGCCAATGCC GAAACACCTGTGCCAATGCCA AAACACCTGTGCCAATGCCAA AACACCTGTGCCAATGCCAAG ACACCTGTGCCAATGCCAAGA CACCTGTGCCAATGCCAAGAC ACCTGTGCCAATGCCAAGACA CCTGTGCCAATGCCAAGACAG CTGTGCCAATGCCAAGACAGG TGTGCCAATGCCAAGACAGGG GTGCCAATGCCAAGACAGGGG TGCCAATGCCAAGACAGGGGT GCCAATGCCAAGACAGGGGTG CCAATGCCAAGACAGGGGTGC CAATGCCAAGACAGGGGTGCC AATGCCAAGACAGGGGTGCCA ATGCCAAGACAGGGGTGCCAA TGCCAAGACAGGGGTGCCAAG GCCAAGACAGGGGTGCCAAGA

k-mer.count=Sequence.length-k+1=75-21+1=55



4.其他部分:很好理解了,就是对3中的串进行map,每个计数为1,然后再进行reduceByKey操作,最后进行排序,具体是先将k-v交换,然后再sortByKey

val kmers =reads.flatMap(_.getSequence.sliding(21).map(k => (k, 1L))).reduceByKey(_ + _).map(_.swap).sortByKey(ascending = false)

kmers.take(10).foreach(println)
打印和保存:
<pre name="code" class="plain" style="font-size: 13.3333px;">

 
<pre name="code" class="plain" style="font-size: 13.3333px;">scala> kmers.foreach(println)
<span style="font-family: Arial, Helvetica, sans-serif; font-size: 12px;">scala> kmers.saveAsTextFile("hdfs://Master:9000/xubo/adam/output/smallkmers.adam")</span>
 
(4,TCTTTCTTTCTTTCTTTCTTT)
(4,TTTCTTTCTTTCTTTCTTTCT)
(3,CTTTCTTTCTTTCTTTCTTTC)
(3,TTCTTTCTTTCTTTCTTTCTT)
(2,TCTTTTTCTTTCTTTCTTTCT)
(2,TTCTTTTTCTTTCTTTCTTTC)
(2,TTTCTTTTTCTTTCTTTCTTT)
(1,ATTGGATATCCTCCCAAATTT)
(1,AGGCATGAGGCACCGCGCCTG)
(1,CTACTGCCCAACAAGTCCCTA)
(1,TGGAATTTAGGGAAACGATGT)
(1,GGGCTGCAGTCAATTCCCAGG)
(1,ATGTAGCATGCATATGGCTAA)
(1,TGTAGCATGCATATGGCTAAC)
(1,CTCTCCTGCTGCACTGTGAAG)
(1,GTTCTTCAGCTTTAGGACTTG)
(1,TTCTTTCTCTTTCTTTCTTTC)
(1,AACTGATAATCCATACTATAT)
(1,GAAGCTCCCACTAGGGCTGCA)
(1,GTAAACAATTAGGTCATTATA)
(1,ATTTCATCTTATTTTTGGAAA)
(1,TTTAAAAGCAGTCCCTCACGC)
(1,GTTTTGAGATGGAGTCTCGCT)
(1,TTTGTATTTTTGTAGAAACGG)
(1,GTGCTGGGACTACAGGCATGA)
(1,GCTCACTGCAGCCTCAACCTC)
(1,TGTATCTTCCTCCCCTGCTGT)
(1,AACTAACATAATTGGCACTGT)
(1,CATTCTCTCTCCTGCTGCACT)
(1,ATTATAGTAATAATCACAGTG)
(1,ACCCAAGTCAAAAAGAAAAAA)
(1,TTTTGTATTTTTGTAGAAACG)
(1,CAGGCCTTGCAAGAATCTCTA)
(1,ACTTGAGCTAAGCCTTGCAGG)
(1,TGCCCAGCTAATTTTTGTATT)
(1,TGATTTCATCTTATTTTTGGA)
(1,TTAAAAGCAGTCCCTCACGCA)
(1,CCACGTTCTTCAGCTTTAGGA)
(1,AGGGCTGCAGTCAATTCCCAG)
(1,TTTCTTTCTTTCTCTTTCTTT)
(1,GGGTGAAACACCTGTGCCAAT)
(1,CCATCATGCCCAGCTAATTTT)
(1,CCCTGCTGTATGTTTCCTGCC)
(1,CTGCAGCCTCAACCTCCTGGG)
(1,GCTGCCAGCCCTCACAATGCC)
(1,GTTTGTTTGTTTTGAGATGGA)
(1,ATTAGGTCATTATAAATATAT)
(1,TTCAATGCCTATACAAGTAAA)
(1,CAAACATCACACTCCACGTTC)
(1,TATTTTTAAGGTTTTTTTTGT)
(1,TAGTACTTGAGCTAAGCCTTG)
(1,CAATGCCAACAGCTAAATGTA)
(1,TCCTCCCAAATTTTATTTAAG)
(1,CTTTCTTTCTTTCTTTTTCTT)
(1,AATGTTGATTGTCCTATTTAA)
(1,AAGAGTATAAGTTTCCTGAGG)
(1,GCAAGAATCTCTACTGCCCAA)
(1,TTCAAATTGGATATCCTCCCA)
(1,CTCCCAAATTTTATTTAAGCA)
(1,CCAGCACCCAAGTCAAAAAGA)
(1,TAGGTGCTGAGCAGTGGGAGG)
(1,TGAGCAGTGGGAGGTGGTGGC)
(1,TGTTGATTGTCCTATTTAATT)
(1,CCTCAAACATCACACTCCACG)
(1,TTTATTTTTATTTTTAAGGTT)
(1,GGAGGAATAATTATAGTAATA)
(1,GTAGCATGCATATGGCTAACG)
(1,TGATTGTCCTATTTAATTATT)
(1,TTTTTTTTGTTTGTTTGTTTT)
(1,AAATAAAGTTTGGCTTTCAGT)
(1,CAATTAGGTCATTATAAATAT)
(1,CTCCCAAAGTGCTGGGACTAC)
(1,GAGATGGAGTCTCGCTCCACC)
(1,TCGGCCTCCCAAAGTGCTGGG)
(1,GGCATTTAAAAGCAGTCCCTC)
(1,ATGTACCCAAGTGTTACTGAA)
(1,GTATGTTTCCTGCCCTCAAAC)
(1,TTATTCTCAACTTTCCGATTT)
(1,GCAGGTTGAGTAGGATTATTC)
(1,TGGGTCTCACTATGTTGCCTA)
(1,TTTATAATCAGGGTGAAACAC)
(1,CCCACCATCATGCCCAGCTAA)
(1,TCCCAGGTCTTAGGTGCTGAG)
(1,GCACCCAAGTCAAAAAGAAAA)
(1,GTGCCTGTTGCCAAGAGTATA)
(1,CCTTGCAGGTTGAGTAGGATT)
(1,GTATAAGTTTCCTGAGGCCTC)
(1,TGTACCCAAGTGTTACTGAAC)
(1,CTCCCACTAGGGCTGCAGTCA)
(1,CACCCAAGTCAAAAAGAAAAA)
(1,ATTCCTATTTATAATCAGGGT)
(1,GCAGCCTTATTCCTATTTATA)
(1,TCCTCCCCTGCTGTATGTTTC)
(1,ATTTCCCATGTAACAGTGTTG)
(1,TCTCGCTCCACCGCCCAGACT)
(1,AGGAATAATTATAGTAATAAT)
(1,TCCCCTGCTGTATGTTTCCTG)
(1,GCCTTATTCCTATTTATAATC)
(1,ATATATGCTCAGTGTAAACAA)
(1,GAGGAATAATTATAGTAATAA)
(1,TTTACCAGCACCCAAGTCAAA)
(1,GCCCAAGTGATTTCATCTTAT)
(1,CACTCCACGTTCTTCAGCTTT)
(1,TCCCATGTAACAGTGTTGTTT)
(1,AATTGGCACTGTCCCTGTAAA)
(1,TGGCATTTAAAAGCAGTCCCT)
(1,TACAGGCATGAGGCACCGCGC)
(1,GACTGGGTCTCACTATGTTGC)
(1,ACAGTTATTTAAAGCCTTTAA)
(1,AAAACTTTACCAGCACCCAAG)
(1,GTTATTTAAAGCCTTTAAAAA)
(1,ATCTTCCTCCCCTGCTGTATG)
(1,TCTCAAACTCCTGGGCTCAAG)
(1,TTAATTATTCTCAACTTTCCG)
(1,CCAAATTTTATTTAAGCAATT)
(1,CCCACTAGGGCTGCAGTCAAT)
(1,AACTTTCCGATTTTATTTCCC)
(1,GATTATTCTAGTGGAATTTAG)
(1,TGAGGGAGGAATAATTATAGT)
(1,GAGAAGCTCCCACTAGGGCTG)
(1,CCATCTTCCCAGAGCTGCCAG)
(1,CTCAAGTGATCCATCTCTGCC)
(1,ACACCTGTGCCAATGCCAAGA)
(1,TCCTGCCCTCAAACATCACAC)
(1,TGCAGTCAATTCCCAGGTCTT)
(1,TATATGCTCAGTGTAAACAAT)
(1,ATATCCTCCCAAATTTTATTT)
(1,GTCAAAAAGAAAAAAAAGGGG)
(1,CTGTATGTTTCCTGCCCTCAA)
(1,TGCCAGCCCTCACAATGCCAA)
(1,CCTGTAAATTCAAATTGGATA)
(1,ATTTGAGTAGGGCTGTCACAT)
(1,GGCCTTGCAAGAATCTCTACT)
(1,TCACTGCAGCCTCAACCTCCT)
(1,AAATGCCCCCATCTTCCCAGA)
(1,AAGCTCCCACTAGGGCTGCAG)
(1,TATAATCAGGGTGAAACACCT)
(1,CTTATTCCTATTTATAATCAG)
(1,AACAAGTCCCTACAAGATGGC)
(1,AAACAGCTCGTTTAACTGATA)
(1,CCCGCCTCGGCCTCCCAAAGT)
(1,CTCACAATGCCAACAGCTAAA)
(1,GTAGAAACGGGGTTTCACCAT)
(1,TCTCATTGACTGTTCAATGCC)
(1,GACAAGATAGTACTTGAGCTA)
(1,TTGTCCTATTTAATTATTCTC)
(1,CAAGTCAAAAAGAAAAAAAAG)
(1,AAACTCCTGGGCTCAAGTGAT)
(1,TCCACCCGCCTCGGCCTCCCA)
(1,CTCATTGACTGTTCAATGCCT)
(1,TTTTATTTTTAAGGTTTTTTT)
(1,AAACTTTACCAGCACCCAAGT)
(1,TGAGATGGAGTCTCGCTCCAC)
(1,TCAATTCCCAGGTCTTAGGTG)
(1,ATAAAGTTTGGCTTTCAGTTG)
(1,TACAGGCACCCACCATCATGC)
(1,TCCTATTTATAATCAGGGTGA)
(1,GAGCAGCCTTATTCCTATTTA)
(1,GGGAGGAATAATTATAGTAAT)
(1,AGATGGAGTCTCGCTCCACCG)
(1,AATAATTATAGTAATAATCAC)
(1,CTGGGCCCAAGTGATTTCATC)
(1,ATGCCCCCATCTTCCCAGAGC)
(1,TGTTTGTTTTGAGATGGAGTC)
(1,ACTACAGGCATGAGGCACCGC)
(1,TGTTTTGAGATGGAGTCTCGC)
(1,TTAAGGTTTTTTTTGTTTGTT)
(1,ATCTCTACTGCCCAACAAGTC)
(1,TCTTTCTTTCTTTCTTTTTCT)
(1,TCCACGTTCTTCAGCTTTAGG)
(1,GCCTCGGCCTCCCAAAGTGCT)
(1,CAGGCATGAGGCACCGCGCCT)
(1,GCTAATTTTTGTATTTTTGTA)
(1,GTAACTAACATAATTGGCACT)
(1,CCGATTTTATTTCCCATGTAA)
(1,TGAGCATGAAAGTAATATATG)
(1,GTGGAATTTAGGGAAACGATG)
(1,AAGTCCCTACAAGATGGCATT)
(1,ATAGTAATAATCACAGTGATG)
(1,CTCATTCTCTCTCCTGCTGCA)
(1,AAGATGGCATTTAAAAGCAGT)
(1,ATCTTTATCACAGTTATTTAA)
(1,TCCCAGAGCTGCCAGCCCTCA)
(1,GGATTATTCTAGTGGAATTTA)
(1,AACTTTGAATATCTTTATCAC)
(1,AACGGCAAAGTGAGGGAGGAA)
(1,TTGACTGTTCAATGCCTATAC)
(1,CAAAGTGCTGGGACTACAGGC)
(1,AGACTGGGTCTCACTATGTTG)
(1,AGAGCTGCCAGCCCTCACAAT)
(1,TTTCCTGCCCTCAAACATCAC)
(1,CCAGGTCTTAGGTGCTGAGCA)
(1,GGCCTCCCAAAGTGCTGGGAC)
(1,TAATTATTCTCAACTTTCCGA)
(1,CTGTAAATTCAAATTGGATAT)
(1,AGAAGCTCCCACTAGGGCTGC)
(1,GTCCCTACAAGATGGCATTTA)
(1,TAAAGCCTTTAAAAAGCTTTA)
(1,GTTGATTGTCCTATTTAATTA)
(1,ATTGTCCTATTTAATTATTCT)
(1,GCAAAGTGAGGGAGGAATAAT)
(1,GGTCTTAGGTGCTGAGCAGTG)
(1,CTTGCAGGTTGAGTAGGATTA)
(1,CCCTACAAGATGGCATTTAAA)
(1,ATAATCACAGTGATGACGTGG)
(1,ACTGTGAAGAGGTGCCTGTTG)
(1,ATGTTGATTGTCCTATTTAAT)
(1,TTTATTTTTTGAGCATGAAAG)
(1,ATGCCTATACAAGTAAAACTT)
(1,CCCCTGCTGTATGTTTCCTGC)
(1,TCTTTCTTTCTTTCTCTTTCT)
(1,AAAGTGCTGGGACTACAGGCA)
(1,TATAGTAATAATCACAGTGAT)
(1,CTAATTTTTGTATTTTTGTAG)
(1,TCTTATTTTTGGAAAAAAAAA)
(1,TAACATAATTGGCACTGTCCC)
(1,GAGTATAAGTTTCCTGAGGCC)
(1,GCCAACAGCTAAATGTACCCA)
(1,ACGGCAAAGTGAGGGAGGAAT)
(1,GAATAATTATAGTAATAATCA)
(1,TCTCTCCTGCTGCACTGTGAA)
(1,AAATGTACCCAAGTGTTACTG)
(1,TTTCCTGAGGCCTCCCAGGCC)
(1,TTCATCTTATTTTTGGAAAAA)
(1,AGTACTTGAGCTAAGCCTTGC)
(1,AATGCCTATACAAGTAAAACT)
(1,TGGCACTGTCCCTGTAAATTC)
(1,TGTAGAAACGGGGTTTCACCA)
(1,CCTAGAGAAGCTCCCACTAGG)
(1,CTCAGTGTAAACAATTAGGTC)
(1,ACCTCCTGGGCCCAAGTGATT)
(1,TTCCCAGAGCTGCCAGCCCTC)
(1,ATGGCTAACGGCAAAGTGAGG)
(1,ACCATCATGCCCAGCTAATTT)
(1,GTAATAATCACAGTGATGACG)
(1,TCCCACTAGGGCTGCAGTCAA)
(1,CACGTTCTTCAGCTTTAGGAC)
(1,ATGCCAAGACAGGGGTGCCAA)
(1,AGTGTAAACAATTAGGTCATT)
(1,TTTTTGGAAAAAAAAACAAAC)
(1,AAGTGAGGGAGGAATAATTAT)
(1,CAGTGTAAACAATTAGGTCAT)
(1,GCAGCCTCAACCTCCTGGGCC)
(1,CTTTTTCTTTCTTTCTTTCTC)
(1,AGGCACCGCGCCTGGCCAGGA)
(1,TTTTGTAGAAACGGGGTTTCA)
(1,ACATCACACTCCACGTTCTTC)
(1,TGTGTAACTAACATAATTGGC)
(1,GCCTGTTGCCAAGAGTATAAG)
(1,TGGCTAACGGCAAAGTGAGGG)
(1,TTTAAGGTTTTTTTTGTTTGT)
(1,CTGAGCAGTGGGAGGTGGTGG)
(1,CCCTCACAATGCCAACAGCTA)
(1,TAATCCATACTATATTTGAGT)
(1,GTGAGGGAGGAATAATTATAG)
(1,GTTTAACTGATAATCCATACT)
(1,GGCTCAAGTGATCCATCTCTG)
(1,TATCCTCCCAAATTTTATTTA)
(1,TTTCTTTCTTTCTTTTTCTTT)
(1,TCGCTCCACCGCCCAGACTGG)
(1,TGTTTCCTGCCCTCAAACATC)
(1,GCCTCCCAAAGTGCTGGGACT)
(1,ACTGTTCAATGCCTATACAAG)
(1,TTGATTGTCCTATTTAATTAT)
(1,TTGGCTTTCAGTTGTAACTTT)
(1,CCAGAGCTGCCAGCCCTCACA)
(1,TTATAAATATATTTAACAGGA)
(1,TAAACAATTAGGTCATTATAA)
(1,GGAGTCTCGCTCCACCGCCCA)
(1,CTGCCCTCAAACATCACACTC)
(1,TATGTTTCCTGCCCTCAAACA)
(1,TATTTTTATTTTTAAGGTTTT)
(1,GCCCAGCTAATTTTTGTATTT)
(1,GATGGAGTCTCGCTCCACCGC)
(1,TGTGCCAATGCCAAGACAGGG)
(1,TCTCTACTGCCCAACAAGTCC)
(1,GTTTCCTGCCCTCAAACATCA)
(1,CTGTTGCCAAGAGTATAAGTT)
(1,TGTGAAGAGGTGCCTGTTGCC)
(1,CAGTCAATTCCCAGGTCTTAG)
(1,AAGATAGTACTTGAGCTAAGC)
(1,AGGGAGGAATAATTATAGTAA)
(1,ATTATAAATATATTTAACAGG)
(1,CTAGAGAAGCTCCCACTAGGG)
(1,CAGCTCGTTTAACTGATAATC)
(1,AAGGTTTTTTTTGTTTGTTTG)
(1,TCAGGGTGAAACACCTGTGCC)
(1,AGATGGCATTTAAAAGCAGTC)
(1,CTACAAGATGGCATTTAAAAG)
(1,CCTGCTGCACTGTGAAGAGGT)
(1,CTGGGCTCAAGTGATCCATCT)
(1,CATGAAAGTAATATATGCTCA)
(1,CCTAGGCTGGTCTCAAACTCC)
(1,GCATGCATATGGCTAACGGCA)
(1,GGATATCCTCCCAAATTTTAT)
(1,AGTCTCGCTCCACCGCCCAGA)
(1,CAGTTATTTAAAGCCTTTAAA)
(1,TGAAGAGGTGCCTGTTGCCAA)
(1,CTTAGGTGCTGAGCAGTGGGA)
(1,ATCCATACTATATTTGAGTAG)
(1,TATTTTTGGAAAAAAAAACAA)
(1,TTTCTTTCTCTTTCTTTCTTT)
(1,CCTGGGCCCAAGTGATTTCAT)
(1,ATTCTAGTGGAATTTAGGGAA)
(1,TCCTGGGCCCAAGTGATTTCA)
(1,CAAGTCCCTACAAGATGGCAT)
(1,CTGTCCCTGTAAATTCAAATT)
(1,TGATCCATCTCTGCCTTCCAA)
(1,TGTTGCCTAGGCTGGTCTCAA)
(1,TGCTGTATGTTTCCTGCCCTC)
(1,CATGTAGCATGCATATGGCTA)
(1,TAGGTCATTATAAATATATTT)
(1,GTTGAGTAGGATTATTCTAGT)
(1,GGGCCCAAGTGATTTCATCTT)
(1,ACTCCTGGGCTCAAGTGATCC)
(1,CCTGTGCCAATGCCAAGACAG)
(1,TCTTTCTTTTTCTTTCTTTCT)
(1,TTTGAGTAGGGCTGTCACATG)
(1,CTGGTCTCAAACTCCTGGGCT)
(1,TAAAACTTTACCAGCACCCAA)
(1,GTAATATATGCTCAGTGTAAA)
(1,TCAGGTGATCCACCCGCCTCG)
(1,ACATAATTGGCACTGTCCCTG)
(1,TAGTGGAATTTAGGGAAACGA)
(1,CTGCTGTATGTTTCCTGCCCT)
(1,TCAACTTTCCGATTTTATTTC)
(1,AATTATTCTCAACTTTCCGAT)
(1,GCTGGTCTCAAACTCCTGGGC)
(1,GTAACTTTGAATATCTTTATC)
(1,TGAAAGTAATATATGCTCAGT)
(1,GGTCTCACTATGTTGCCTAGG)
(1,AACAGCTAAATGTACCCAAGT)
(1,CACCTGTGCCAATGCCAAGAC)
(1,CCAGCCCTCACAATGCCAACA)
(1,AGGTGCTGAGCAGTGGGAGGT)
(1,TATTCCTATTTATAATCAGGG)
(1,TTTAATTATTCTCAACTTTCC)
(1,TAACTAACATAATTGGCACTG)
(1,TTGAGATGGAGTCTCGCTCCA)
(1,TTTTCTTTCTTTCTTTCTTTC)
(1,CATGCCCAGCTAATTTTTGTA)
(1,GAGCAGTGGGAGGTGGTGGCC)
(1,CCTCGGCCTCCCAAAGTGCTG)
(1,TCTTTCTTTCTCTTTCTTTCT)
(1,TCAACCTCCTGGGCCCAAGTG)
(1,TAGGCTGGTCTCAAACTCCTG)
(1,TGCTCAGTGTAAACAATTAGG)
(1,ATTGACTGTTCAATGCCTATA)
(1,AGATAGTACTTGAGCTAAGCC)
(1,GCCAATGCCAAGACAGGGGTG)
(1,TCTTCAGCTTTAGGACTTGGA)
(1,GGTTGAGTAGGATTATTCTAG)
(1,TGCCAATGCCAAGACAGGGGT)
(1,CTAAGCCTTGCAGGTTGAGTA)
(1,TGGAGTCTCGCTCCACCGCCC)
(1,GGAATTTAGGGAAACGATGTG)
(1,GGTCTCAAACTCCTGGGCTCA)
(1,GTTGCCAAGAGTATAAGTTTC)
(1,GAGTCTCGCTCCACCGCCCAG)
(1,AAACATCACACTCCACGTTCT)
(1,CTAGTGGAATTTAGGGAAACG)
(1,AGGTGATCCACCCGCCTCGGC)
(1,TAAACAGCTCGTTTAACTGAT)
(1,TTATAATCAGGGTGAAACACC)
(1,TGCCAACAGCTAAATGTACCC)
(1,GTTTTTTTTGTTTGTTTGTTT)
(1,CCTATTTATAATCAGGGTGAA)
(1,GGCTTTCAGTTGTAACTTTGA)
(1,TTTCTTTCTTTTTCTTTCTTT)
(1,ATGTTTCCTGCCCTCAAACAT)
(1,TTATTTTTGGAAAAAAAAACA)
(1,CTTGCAAGAATCTCTACTGCC)
(1,CTCCTGGGCTCAAGTGATCCA)
(1,CTATGTTGCCTAGGCTGGTCT)
(1,GCATATGGCTAACGGCAAAGT)
(1,AGAGCAGCCTTATTCCTATTT)
(1,CTTTCTTTTTCTTTCTTTCTT)
(1,CCTTATTCCTATTTATAATCA)
(1,CCTCAACCTCCTGGGCCCAAG)
(1,TAATCAGGGTGAAACACCTGT)
(1,CCATACTATATTTGAGTAGGG)
(1,TTTATTTTTAAGGTTTTTTTT)
(1,ACTATGTTGCCTAGGCTGGTC)
(1,CAGGTTGAGTAGGATTATTCT)
(1,TAATCACAGTGATGACGTGGA)
(1,TACTGCCCAACAAGTCCCTAC)
(1,CTGGGTCTCACTATGTTGCCT)
(1,ATCACACTCCACGTTCTTCAG)
(1,GACTGTTCAATGCCTATACAA)
(1,CTTCCCAGAGCTGCCAGCCCT)
(1,CTCGCTCCACCGCCCAGACTG)
(1,TTTGTTTGTTTGTTTTGAGAT)
(1,CTCTCTCCTGCTGCACTGTGA)
(1,TATCTTTATCACAGTTATTTA)
(1,AGGTCTTAGGTGCTGAGCAGT)
(1,TTCTTTCTTTTTCTTTCTTTC)
(1,CCAAGTCAAAAAGAAAAAAAA)
(1,TAAGAGCAGCCTTATTCCTAT)
(1,CAATGCCAAGACAGGGGTGCC)
(1,GCACTGTCCCTGTAAATTCAA)
(1,TACAAGATGGCATTTAAAAGC)
(1,TAACGGCAAAGTGAGGGAGGA)
(1,GGGTCTCACTATGTTGCCTAG)
(1,AGGTGCCTGTTGCCAAGAGTA)
(1,TAAATGTACCCAAGTGTTACT)
(1,GGCTGGTCTCAAACTCCTGGG)
(1,TGCCCTCAAACATCACACTCC)
(1,TCATTGACTGTTCAATGCCTA)
(1,TCATGTAGCATGCATATGGCT)
(1,ACCTGTGCCAATGCCAAGACA)
(1,ATACAAGTAAAACTTTACCAG)
(1,TGTCACATGGTTGGAACCTCC)
(1,CTAGGCTGGTCTCAAACTCCT)
(1,AGTATAAGTTTCCTGAGGCCT)
(1,GTCTCAAACTCCTGGGCTCAA)
(1,ATTTTTGTATTTTTGTAGAAA)
(1,ATTGGCACTGTCCCTGTAAAT)
(1,TTTTCTTTCTTTCTTTCTCTT)
(1,GCCAAGACAGGGGTGCCAAGA)
(1,TGCCAAGACAGGGGTGCCAAG)
(1,CCAACAAGTCCCTACAAGATG)
(1,TTCTTTCTTTCTTTCTCTTTC)
(1,TATGTTGCCTAGGCTGGTCTC)
(1,TCATTATAAATATATTTAACA)
(1,AGTTTCCTGAGGCCTCCCAGG)
(1,CTCAACTTTCCGATTTTATTT)
(1,GCTGCACTGTGAAGAGGTGCC)
(1,GTCTCACTATGTTGCCTAGGC)
(1,TATAAATATATTTAACAGGAA)
(1,AATTAAACAGCTCGTTTAACT)
(1,ACTTTGAATATCTTTATCACA)
(1,CCAAAGTGCTGGGACTACAGG)
(1,TAGAGAAGCTCCCACTAGGGC)
(1,CGCCTCGGCCTCCCAAAGTGC)
(1,CAGCACCCAAGTCAAAAAGAA)
(1,AATCTCTACTGCCCAACAAGT)
(1,AATAAAGTTTGGCTTTCAGTT)
(1,TTTTTGTAGAAACGGGGTTTC)
(1,TATTTGAGTAGGGCTGTCACA)
(1,GCTAAGCCTTGCAGGTTGAGT)
(1,AGAATCTCTACTGCCCAACAA)
(1,TAGAAACGGGGTTTCACCATG)
(1,TTTTTTTGTTTGTTTGTTTTG)
(1,CACACTCCACGTTCTTCAGCT)
(1,GCTGGGACTACAGGCATGAGG)
(1,TCTTTCTCTTTCTTTCTTTCT)
(1,CAGCTAATTTTTGTATTTTTG)
(1,GTGAAGAGGTGCCTGTTGCCA)
(1,TTCCCATGTAACAGTGTTGTT)
(1,AATATCTTTATCACAGTTATT)
(1,AAACAATTAGGTCATTATAAA)
(1,ATGAAAGTAATATATGCTCAG)
(1,GAGCTAAGCCTTGCAGGTTGA)
(1,TTAAAGCCTTTAAAAAGCTTT)
(1,AAAAAAAACAAACTAAACCAA)
(1,CACTAGGGCTGCAGTCAATTC)
(1,TTTTTAAGGTTTTTTTTGTTT)
(1,AATTTAGGGAAACGATGTGCA)
(1,ATAATCCATACTATATTTGAG)
(1,TGAGTAGGATTATTCTAGTGG)
(1,GGACTACAGGCATGAGGCACC)
(1,GAGGCACCGCGCCTGGCCAGG)
(1,AATGCCAACAGCTAAATGTAC)
(1,AATAAATGTTGATTGTCCTAT)
(1,CGGGGTTTCACCATGTTGGCC)
(1,TTATTTTTTGAGCATGAAAGT)
(1,TTTAAAGCCTTTAAAAAGCTT)
(1,TTTTTGAGCATGAAAGTAATA)
(1,TATTCTAGTGGAATTTAGGGA)
(1,AAGTGATCCATCTCTGCCTTC)
(1,CTCTCATTGACTGTTCAATGC)
(1,AAACACCTGTGCCAATGCCAA)
(1,GCCCTCAAACATCACACTCCA)
(1,CTCACTATGTTGCCTAGGCTG)
(1,AGGTCATTATAAATATATTTA)
(1,AGTAGGATTATTCTAGTGGAA)
(1,TTGTAACTTTGAATATCTTTA)
(1,AAGTTTGGCTTTCAGTTGTAA)
(1,CCCATGTAACAGTGTTGTTTT)
(1,TTTATTTCCCATGTAACAGTG)
(1,TGATAATCCATACTATATTTG)
(1,TGTAACTAACATAATTGGCAC)
(1,TGCCCAACAAGTCCCTACAAG)
(1,AGTGAGGGAGGAATAATTATA)
(1,ATTATTCTCAACTTTCCGATT)
(1,TTTTTATTTTTAAGGTTTTTT)
(1,CTATTTATAATCAGGGTGAAA)
(1,TGCCAAGAGTATAAGTTTCCT)
(1,TATTCTCAACTTTCCGATTTT)
(1,TATTTTTTGAGCATGAAAGTA)
(1,AGCTAAGCCTTGCAGGTTGAG)
(1,CAACAAGTCCCTACAAGATGG)
(1,CCTCCCAAAGTGCTGGGACTA)
(1,CCTGCTGTATGTTTCCTGCCC)
(1,TTTCACCATGTTGGCCCAGCT)
(1,AAGCCTTTAAAAAGCTTTAAT)
(1,AATTAGGTCATTATAAATATA)
(1,GGGTTTCACCATGTTGGCCCA)
(1,GCTCAGTGTAAACAATTAGGT)
(1,CATCTTCCCAGAGCTGCCAGC)
(1,GGTGAAACACCTGTGCCAATG)
(1,CTGCAGTCAATTCCCAGGTCT)
(1,ATCCTCCCAAATTTTATTTAA)
(1,ATTTTTGGAAAAAAAAACAAA)
(1,CCTATACAAGTAAAACTTTAC)
(1,TAACTGATAATCCATACTATA)
(1,TCGTTTAACTGATAATCCATA)
(1,TTTTTCTTTCTTTCTTTCTCT)
(1,CTGTGCCAATGCCAAGACAGG)
(1,TAAATTCAAATTGGATATCCT)
(1,ATGAGGCACCGCGCCTGGCCA)
(1,TGTTGCCAAGAGTATAAGTTT)
(1,CCCATCTTCCCAGAGCTGCCA)
(1,TTATCACAGTTATTTAAAGCC)
(1,GGGGTTTCACCATGTTGGCCC)
(1,AGGCTGGTCTCAAACTCCTGG)
(1,TGAGCTAAGCCTTGCAGGTTG)
(1,TTCTTCAGCTTTAGGACTTGG)
(1,AGTAATATATGCTCAGTGTAA)
(1,TTGAGCTAAGCCTTGCAGGTT)
(1,TACTCTCATTGACTGTTCAAT)
(1,GGAATAATTATAGTAATAATC)
(1,GGTTTTTTTTGTTTGTTTGTT)
(1,AGGTTGAGTAGGATTATTCTA)
(1,AGCTGCCAGCCCTCACAATGC)
(1,ATGCCCAGCTAATTTTTGTAT)
(1,TTTTGGAAAAAAAAACAAACT)
(1,CTTTCTCTTTCTTTCTTTCTT)
(1,TGCCCCCATCTTCCCAGAGCT)
(1,CAAGAATCTCTACTGCCCAAC)
(1,TGGGCCCAAGTGATTTCATCT)
(1,GGTGCTGAGCAGTGGGAGGTG)
(1,TTATTTAAAGCCTTTAAAAAG)
(1,CTCCTGCTGCACTGTGAAGAG)
(1,CCTCCCCTGCTGTATGTTTCC)
(1,AGTGCTGGGACTACAGGCATG)
(1,TGATCCACCCGCCTCGGCCTC)
(1,TTCCGATTTTATTTCCCATGT)
(1,AGTAGGGCTGTCACATGGTTG)
(1,TCTTTCTTTCTTTTTCTTTCT)
(1,ATTAAACAGCTCGTTTAACTG)
(1,AGCTCGTTTAACTGATAATCC)
(1,CTCTACTGCCCAACAAGTCCC)
(1,GAGGTGCCTGTTGCCAAGAGT)
(1,TCTCTCTCCTGCTGCACTGTG)
(1,GGTTTCACCATGTTGGCCCAG)
(1,CACTATGTTGCCTAGGCTGGT)
(1,CAACTTTCCGATTTTATTTCC)
(1,GCACCGCGCCTGGCCAGGACT)
(1,CTCCTGGGCCCAAGTGATTTC)
(1,TTCTCTCTCCTGCTGCACTGT)
(1,ATAAGTTTCCTGAGGCCTCCC)
(1,GCTAAATGTACCCAAGTGTTA)
(1,CCAATGCCAAGACAGGGGTGC)
(1,GGCAAAGTGAGGGAGGAATAA)
(1,AGTTTGGCTTTCAGTTGTAAC)
(1,CTATACAAGTAAAACTTTACC)
(1,TTTTAAGGTTTTTTTTGTTTG)
(1,TATACAAGTAAAACTTTACCA)
(1,TTGAGTAGGATTATTCTAGTG)
(1,CACATGGTTGGAACCTCCGGT)
(1,AACCTCCTGGGCCCAAGTGAT)
(1,TAAATGTTGATTGTCCTATTT)
(1,GCTTTCAGTTGTAACTTTGAA)
(1,GGCTAACGGCAAAGTGAGGGA)
(1,ATTTAAAGCCTTTAAAAAGCT)
(1,CTTTCTTTCTTTCTTTCTTTT)
(1,TTATTTTTATTTTTAAGGTTT)
(1,GTACTTGAGCTAAGCCTTGCA)
(1,ACCAGCACCCAAGTCAAAAAG)
(1,GCCAGCCCTCACAATGCCAAC)
(1,ACAGGCATGAGGCACCGCGCC)
(1,AATAATCACAGTGATGACGTG)
(1,AACTCCTGGGCTCAAGTGATC)
(1,TCTCTTTCTTTCTTTCTTTCT)
(1,CTTTCTTTCTTTTTCTTTCTT)
(1,AAGTTTCCTGAGGCCTCCCAG)
(1,GAATTTAGGGAAACGATGTGC)
(1,CAAGTGATTTCATCTTATTTT)
(1,AGCTCCCACTAGGGCTGCAGT)
(1,GGAAAAAAAAACAAACTAAAC)
(1,TATAAGAGCAGCCTTATTCCT)
(1,AAGTGATTTCATCTTATTTTT)
(1,CGTTCTTCAGCTTTAGGACTT)
(1,TCTACTGCCCAACAAGTCCCT)
(1,TTTTTTGAGCATGAAAGTAAT)
(1,ATACTATATTTGAGTAGGGCT)
(1,AAGTGCTGGGACTACAGGCAT)
(1,ATAATCAGGGTGAAACACCTG)
(1,TGCAAGAATCTCTACTGCCCA)
(1,TTCTTTCTTTCTCTTTCTTTC)
(1,GTAGGATTATTCTAGTGGAAT)
(1,ATTTTTATTTTTAAGGTTTTT)
(1,TGTTCAATGCCTATACAAGTA)
(1,ACTGCCCAACAAGTCCCTACA)
(1,TTATTTCCCATGTAACAGTGT)
(1,GCAGTGGGAGGTGGTGGCCAT)
(1,GTTGTAACTTTGAATATCTTT)
(1,TAGGGCTGTCACATGGTTGGA)
(1,ATATGCTCAGTGTAAACAATT)
(1,TTTAACTGATAATCCATACTA)
(1,TTCTTTCTTTCTTTCTTTTTC)
(1,CAAGAGTATAAGTTTCCTGAG)
(1,CTTATTTTTGGAAAAAAAAAC)
(1,TTCCTCCCCTGCTGTATGTTT)
(1,TTTAATAAATGTTGATTGTCC)
(1,TGAATATCTTTATCACAGTTA)
(1,CCCAGGTCTTAGGTGCTGAGC)
(1,CATATGGCTAACGGCAAAGTG)
(1,GCTAACGGCAAAGTGAGGGAG)
(1,ACTAGGGCTGCAGTCAATTCC)
(1,CTTTTTCTTTCTTTCTTTCTT)
(1,TTATTTTTAAGGTTTTTTTTG)
(1,GTGCCAATGCCAAGACAGGGG)
(1,GCCTAGGCTGGTCTCAAACTC)
(1,GCCCAACAAGTCCCTACAAGA)
(1,AGCCTTATTCCTATTTATAAT)
(1,CTACAGGCATGAGGCACCGCG)
(1,TTGCAGGTTGAGTAGGATTAT)
(1,TCAAATTGGATATCCTCCCAA)
(1,CCTCCCAAATTTTATTTAAGC)
(1,CCCCATCTTCCCAGAGCTGCC)
(1,CGGCCTCCCAAAGTGCTGGGA)
(1,ACAATGCCAACAGCTAAATGT)
(1,TTGTAGAAACGGGGTTTCACC)
(1,CAGCTAAATGTACCCAAGTGT)
(1,GCACTGTGAAGAGGTGCCTGT)
(1,ATAGTACTTGAGCTAAGCCTT)
(1,TCTAGTGGAATTTAGGGAAAC)
(1,AAAGTGAGGGAGGAATAATTA)
(1,TAATTATAGTAATAATCACAG)
(1,CCAAGTGATTTCATCTTATTT)
(1,TAACTTTGAATATCTTTATCA)
(1,AAATTGGATATCCTCCCAAAT)
(1,TTCCCAGGTCTTAGGTGCTGA)
(1,CTTTATCACAGTTATTTAAAG)
(1,TCCCAAAGTGCTGGGACTACA)
(1,ATCTTATTTTTGGAAAAAAAA)
(1,TCTCCTGCTGCACTGTGAAGA)
(1,GTCTCGCTCCACCGCCCAGAC)
(1,CTACTCTCATTGACTGTTCAA)
(1,TTTTTCTTTCTTTCTTTCTTT)
(1,TTTGTTTGTTTTGAGATGGAG)
(1,CCACTAGGGCTGCAGTCAATT)
(1,TCAGTGTAAACAATTAGGTCA)
(1,GTCTTAGGTGCTGAGCAGTGG)
(1,TGTATGTTTCCTGCCCTCAAA)
(1,TATTTCCCATGTAACAGTGTT)
(1,CCTGTTGCCAAGAGTATAAGT)
(1,ATGTTGCCTAGGCTGGTCTCA)
(1,AGCAGCCTTATTCCTATTTAT)
(1,AGAGGTGCCTGTTGCCAAGAG)
(1,CATCTTATTTTTGGAAAAAAA)
(1,TCTTCCTCCCCTGCTGTATGT)
(1,CCTTGCAAGAATCTCTACTGC)
(1,CTTTCTTTCTTTCTCTTTCTT)
(1,CACAATGCCAACAGCTAAATG)
(1,ATTTATAATCAGGGTGAAACA)
(1,CTTTGAATATCTTTATCACAG)
(1,TCTCAACTTTCCGATTTTATT)
(1,TGTTTGTTTGTTTTGAGATGG)
(1,ACTCCACGTTCTTCAGCTTTA)
(1,GACTACAGGCATGAGGCACCG)
(1,GTTCAATGCCTATACAAGTAA)
(1,AGCTAATTTTTGTATTTTTGT)
(1,CTGCACTGTGAAGAGGTGCCT)
(1,CTCACTGCAGCCTCAACCTCC)
(1,CTCAGGTGATCCACCCGCCTC)
(1,TCCTGCTGCACTGTGAAGAGG)
(1,GCTCCACCGCCCAGACTGGAG)
(1,AAAAAAAAACAAACTAAACCA)
(1,TGCCTATACAAGTAAAACTTT)
(1,TTTTTGTTTGTTTGTTTTGAG)
(1,TTTTATTTCCCATGTAACAGT)
(1,AAGAATCTCTACTGCCCAACA)
(1,GAATATCTTTATCACAGTTAT)
(1,TGGCTTTCAGTTGTAACTTTG)
(1,AGTGATCCATCTCTGCCTTCC)
(1,GGCTGCAGTCAATTCCCAGGT)
(1,AAATTAAACAGCTCGTTTAAC)
(1,CCCAACAAGTCCCTACAAGAT)
(1,ATATTTGAGTAGGGCTGTCAC)
(1,TTATAGTAATAATCACAGTGA)
(1,CTGTGAAGAGGTGCCTGTTGC)
(1,TTCCTATTTATAATCAGGGTG)
(1,GGCACTGTCCCTGTAAATTCA)
(1,CTCAACCTCCTGGGCCCAAGT)
(1,TGCTGAGCAGTGGGAGGTGGT)
(1,CCCCCATCTTCCCAGAGCTGC)
(1,TAAGCCTTGCAGGTTGAGTAG)
(1,TATGGCTAACGGCAAAGTGAG)
(1,ACACTCCACGTTCTTCAGCTT)
(1,ACTTTACCAGCACCCAAGTCA)
(1,TGGTCTCAAACTCCTGGGCTC)
(1,CAAATTTTATTTAAGCAATTG)
(1,AAGCCTTGCAGGTTGAGTAGG)
(1,GTGCTGAGCAGTGGGAGGTGG)
(1,AAAGCCTTTAAAAAGCTTTAA)
(1,CGATTTTATTTCCCATGTAAC)
(1,TTTTGAGATGGAGTCTCGCTC)
(1,TCACAGTTATTTAAAGCCTTT)
(1,CTAACGGCAAAGTGAGGGAGG)
(1,ATTCAAATTGGATATCCTCCC)
(1,GGCACCGCGCCTGGCCAGGAC)
(1,ATTTTTAAGGTTTTTTTTGTT)
(1,GTGAAACACCTGTGCCAATGC)
(1,TTTCTTTCTTTCTTTCTTTTT)
(1,GATATCCTCCCAAATTTTATT)
(1,ACTGTCCCTGTAAATTCAAAT)
(1,GAAACGGGGTTTCACCATGTT)
(1,TTTCCGATTTTATTTCCCATG)
(1,TCCATACTATATTTGAGTAGG)
(1,TCTTTATCACAGTTATTTAAA)
(1,TTCCTGCCCTCAAACATCACA)
(1,TGCATATGGCTAACGGCAAAG)
(1,TAGTAATAATCACAGTGATGA)
(1,CGCTCCACCGCCCAGACTGGA)
(1,CCCAAGTCAAAAAGAAAAAAA)
(1,CTGCCCAACAAGTCCCTACAA)
(1,ATTTTATTTCCCATGTAACAG)
(1,GAGTAGGGCTGTCACATGGTT)
(1,CAAGATAGTACTTGAGCTAAG)
(1,CCTCCTGGGCCCAAGTGATTT)
(1,CAGCCTCAACCTCCTGGGCCC)
(1,CCACCATCATGCCCAGCTAAT)
(1,CTGCCAGCCCTCACAATGCCA)
(1,TCCCTGTAAATTCAAATTGGA)
(1,CAATTCCCAGGTCTTAGGTGC)
(1,CAAATTGGATATCCTCCCAAA)
(1,TTGGCACTGTCCCTGTAAATT)
(1,CACTGTCCCTGTAAATTCAAA)
(1,CTAACATAATTGGCACTGTCC)
(1,GAGGGAGGAATAATTATAGTA)
(1,TCCCAAATTTTATTTAAGCAA)
(1,TTGAATATCTTTATCACAGTT)
(1,CAAGATGGCATTTAAAAGCAG)
(1,CCCTGTAAATTCAAATTGGAT)
(1,GTATCTTCCTCCCCTGCTGTA)
(1,CTCGGCCTCCCAAAGTGCTGG)
(1,AATCCATACTATATTTGAGTA)
(1,CCTGGGCTCAAGTGATCCATC)
(1,CAGGCACCCACCATCATGCCC)
(1,ACATGGTTGGAACCTCCGGTT)
(1,TACAAGTAAAACTTTACCAGC)
(1,CACAGTTATTTAAAGCCTTTA)
(1,AGTCCCTACAAGATGGCATTT)
(1,CATGCATATGGCTAACGGCAA)
(1,CCAAGAGTATAAGTTTCCTGA)
(1,GCATTTAAAAGCAGTCCCTCA)
(1,CAGGGTGAAACACCTGTGCCA)
(1,CTTGAGCTAAGCCTTGCAGGT)
(1,ATAATTATAGTAATAATCACA)
(1,GCTCAGGCCTTGCAAGAATCT)
(1,ATCCATCTCTGCCTTCCAAAG)
(1,AGTCAATTCCCAGGTCTTAGG)
(1,CTTCCTCCCCTGCTGTATGTT)
(1,ATTATTCTAGTGGAATTTAGG)
(1,GAAAGTAATATATGCTCAGTG)
(1,AGCCCTCACAATGCCAACAGC)
(1,CTCAAACTCCTGGGCTCAAGT)
(1,TTCTAGTGGAATTTAGGGAAA)
(1,CTCTTTCTTTCTTTCTTTCTT)
(1,AACACCTGTGCCAATGCCAAG)
(1,CTGTTCAATGCCTATACAAGT)
(1,AGTCAAAAAGAAAAAAAAGGG)
(1,GATAGTACTTGAGCTAAGCCT)
(1,CACTGTGAAGAGGTGCCTGTT)
(1,TTTGTTTTGAGATGGAGTCTC)
(1,ACCCGCCTCGGCCTCCCAAAG)
(1,TAATAAATGTTGATTGTCCTA)
(1,AGGTTTTTTTTGTTTGTTTGT)
(1,TGAAACACCTGTGCCAATGCC)
(1,AGCATGAAAGTAATATATGCT)
(1,TTATTCTAGTGGAATTTAGGG)
(1,GGGACTACAGGCATGAGGCAC)
(1,TTGTTTTGAGATGGAGTCTCG)
(1,AGCACCCAAGTCAAAAAGAAA)
(1,GCACCCACCATCATGCCCAGC)
(1,GATCCATCTCTGCCTTCCAAA)
(1,TTGAGTAGGGCTGTCACATGG)
(1,AATTTTTGTATTTTTGTAGAA)
(1,TACTTGAGCTAAGCCTTGCAG)
(1,TCATTCTCTCTCCTGCTGCAC)
(1,CAAGTGATCCATCTCTGCCTT)
(1,AGGCACCCACCATCATGCCCA)
(1,CTAGGGCTGCAGTCAATTCCC)
(1,AGGGCTGTCACATGGTTGGAA)
(1,AGTAAAACTTTACCAGCACCC)
(1,ACTATATTTGAGTAGGGCTGT)
(1,AGGCCTTGCAAGAATCTCTAC)
(1,TGTCCTATTTAATTATTCTCA)
(1,GTGTAAACAATTAGGTCATTA)
(1,AACAGCTCGTTTAACTGATAA)
(1,GAGTAGGATTATTCTAGTGGA)
(1,GTTTCACCATGTTGGCCCAGC)
(1,TTGCCTAGGCTGGTCTCAAAC)
(1,GTCACATGGTTGGAACCTCCG)
(1,GAGCTGCCAGCCCTCACAATG)
(1,GGTGCCTGTTGCCAAGAGTAT)
(1,TTTCAGTTGTAACTTTGAATA)
(1,GTTTGTTTTGAGATGGAGTCT)
(1,TAATATATGCTCAGTGTAAAC)
(1,AACTTTACCAGCACCCAAGTC)
(1,AGGATTATTCTAGTGGAATTT)
(1,AAAGTAATATATGCTCAGTGT)
(1,GTGATCCACCCGCCTCGGCCT)
(1,ATATGGCTAACGGCAAAGTGA)
(1,CCCAAAGTGCTGGGACTACAG)
(1,TGGATATCCTCCCAAATTTTA)
(1,TCAATGCCTATACAAGTAAAA)
(1,TTTGAGATGGAGTCTCGCTCC)
(1,GTCCCTGTAAATTCAAATTGG)
(1,CTAAATGTACCCAAGTGTTAC)
(1,TTCTCTTTCTTTCTTTCTTTC)
(1,TGTATTTTTGTAGAAACGGGG)
(1,AAGTAAAACTTTACCAGCACC)
(1,ACTTTCCGATTTTATTTCCCA)
(1,TTTCTCTTTCTTTCTTTCTTT)
(1,ATGCCAACAGCTAAATGTACC)
(1,CAGGTGATCCACCCGCCTCGG)
(1,TAATAATCACAGTGATGACGT)
(1,TCAGGCCTTGCAAGAATCTCT)
(1,CTTTCTTTCTCTTTCTTTCTT)
(1,AATTATAGTAATAATCACAGT)
(1,TCACTATGTTGCCTAGGCTGG)
(1,TGAGTAGGGCTGTCACATGGT)
(1,CCAGCTAATTTTTGTATTTTT)
(1,TATTTATAATCAGGGTGAAAC)
(1,CTATTTAATTATTCTCAACTT)
(1,TATTTTTGTAGAAACGGGGTT)
(1,TAGGATTATTCTAGTGGAATT)
(1,AAATGTTGATTGTCCTATTTA)
(1,TATATTTGAGTAGGGCTGTCA)
(1,AACATAATTGGCACTGTCCCT)
(1,CTCAAACATCACACTCCACGT)
(1,TACTATATTTGAGTAGGGCTG)
(1,GCATGAAAGTAATATATGCTC)
(1,GCTGTATGTTTCCTGCCCTCA)
(1,AACAATTAGGTCATTATAAAT)
(1,TTCTTTCTTTCTTTTTCTTTC)
(1,TTAGGTGCTGAGCAGTGGGAG)
(1,CAGCCCTCACAATGCCAACAG)
(1,TTTGGCTTTCAGTTGTAACTT)
(1,ATTTTTTGAGCATGAAAGTAA)
(1,TGTAACTTTGAATATCTTTAT)
(1,TCCCTACAAGATGGCATTTAA)
(1,ATATCTTTATCACAGTTATTT)
(1,ATCATGCCCAGCTAATTTTTG)
(1,GCTGAGCAGTGGGAGGTGGTG)
(1,CTGATAATCCATACTATATTT)
(1,ATGGCATTTAAAAGCAGTCCC)
(1,TTTGAGCATGAAAGTAATATA)
(1,TCACATGGTTGGAACCTCCGG)
(1,GTGTAACTAACATAATTGGCA)
(1,AACATCACACTCCACGTTCTT)
(1,TTTGGAAAAAAAAACAAACTA)
(1,TCCGATTTTATTTCCCATGTA)
(1,TTGTTTGTTTGTTTTGAGATG)
(1,CATCATGCCCAGCTAATTTTT)
(1,TGAGGCACCGCGCCTGGCCAG)
(1,GAAAAAAAAACAAACTAAACC)
(1,AAAAAAACAAACTAAACCAAA)
(1,AATCAGGGTGAAACACCTGTG)
(1,GCCCCCATCTTCCCAGAGCTG)
(1,GTAAAACTTTACCAGCACCCA)
(1,ATCAGGGTGAAACACCTGTGC)
(1,TGTAAACAATTAGGTCATTAT)
(1,CATCACACTCCACGTTCTTCA)
(1,ACAAGTAAAACTTTACCAGCA)
(1,CCCAGAGCTGCCAGCCCTCAC)
(1,GTAGGGCTGTCACATGGTTGG)
(1,GAAACACCTGTGCCAATGCCA)
(1,AATATATGCTCAGTGTAAACA)
(1,GCTCAAGTGATCCATCTCTGC)
(1,ATTTAAAAGCAGTCCCTCACG)
(1,TGGAAAAAAAAACAAACTAAA)
(1,AATGCCAAGACAGGGGTGCCA)
(1,TCCTATTTAATTATTCTCAAC)
(1,GCATGAGGCACCGCGCCTGGC)
(1,AACGGGGTTTCACCATGTTGG)
(1,TGGGACTACAGGCATGAGGCA)
(1,CCTGCCCTCAAACATCACACT)
(1,TAAAGTTTGGCTTTCAGTTGT)
(1,ACAGCTAAATGTACCCAAGTG)
(1,TCTTCCCAGAGCTGCCAGCCC)
(1,GCTGTCACATGGTTGGAACCT)
(1,GATGGCATTTAAAAGCAGTCC)
(1,ATGGAGTCTCGCTCCACCGCC)
(1,ATCACAGTTATTTAAAGCCTT)
(1,GATTTTATTTCCCATGTAACA)
(1,GTATAAGAGCAGCCTTATTCC)
(1,AATTCCCAGGTCTTAGGTGCT)
(1,TGACTGTTCAATGCCTATACA)
(1,ATTCTCAACTTTCCGATTTTA)
(1,CACCCGCCTCGGCCTCCCAAA)
(1,TTAGGTCATTATAAATATATT)
(1,GGTGATCCACCCGCCTCGGCC)
(1,AGCCTCAACCTCCTGGGCCCA)
(1,AAGAGGTGCCTGTTGCCAAGA)
(1,GGCCCAAGTGATTTCATCTTA)
(1,GCCCTCACAATGCCAACAGCT)
(1,CATTTAAAAGCAGTCCCTCAC)
(1,AATGTACCCAAGTGTTACTGA)
(1,GTCATTATAAATATATTTAAC)
(1,AGTGGAATTTAGGGAAACGAT)
(1,CAGGTCTTAGGTGCTGAGCAG)
(1,AGAGTATAAGTTTCCTGAGGC)
(1,GTTTGGCTTTCAGTTGTAACT)
(1,AAGAGCAGCCTTATTCCTATT)
(1,ACAGCTCGTTTAACTGATAAT)
(1,GGCTGTCACATGGTTGGAACC)
(1,TGCAGCCTCAACCTCCTGGGC)
(1,ATAAATATATTTAACAGGAAT)
(1,CTTTCAGTTGTAACTTTGAAT)
(1,ATGCTCAGTGTAAACAATTAG)
(1,GTCAATTCCCAGGTCTTAGGT)
(1,ACGTTCTTCAGCTTTAGGACT)
(1,ACAAGTCCCTACAAGATGGCA)
(1,GTTGCCTAGGCTGGTCTCAAA)
(1,TCATCTTATTTTTGGAAAAAA)
(1,ACTAACATAATTGGCACTGTC)
(1,AGCTAAATGTACCCAAGTGTT)
(1,CAGAGCTGCCAGCCCTCACAA)
(1,ACAGGCACCCACCATCATGCC)
(1,AGCCTTGCAGGTTGAGTAGGA)
(1,ATTTAATTATTCTCAACTTTC)
(1,TTGCAAGAATCTCTACTGCCC)
(1,TTCTCAACTTTCCGATTTTAT)
(1,CTCGTTTAACTGATAATCCAT)
(1,AAAATGCCCCCATCTTCCCAG)
(1,TATTTAATTATTCTCAACTTT)
(1,GAATCTCTACTGCCCAACAAG)
(1,TCCTGGGCTCAAGTGATCCAT)
(1,TGCACTGTGAAGAGGTGCCTG)
(1,CTATATTTGAGTAGGGCTGTC)
(1,GCAGTCAATTCCCAGGTCTTA)
(1,TGCCTAGGCTGGTCTCAAACT)
(1,CAAGTAAAACTTTACCAGCAC)
(1,GGCATGAGGCACCGCGCCTGG)
(1,CAGTTGTAACTTTGAATATCT)
(1,TTTATCACAGTTATTTAAAGC)
(1,TCACACTCCACGTTCTTCAGC)
(1,CATACTATATTTGAGTAGGGC)
(1,TCATGCCCAGCTAATTTTTGT)
(1,AAGTAATATATGCTCAGTGTA)
(1,CTGCTGCACTGTGAAGAGGTG)
(1,ACCCACCATCATGCCCAGCTA)
(1,TTGAGCATGAAAGTAATATAT)
(1,AAATTCAAATTGGATATCCTC)
(1,TTAACTGATAATCCATACTAT)
(1,ATAATTGGCACTGTCCCTGTA)
(1,GTCCTATTTAATTATTCTCAA)
(1,TCAGTTGTAACTTTGAATATC)
(1,TTGTTTGTTTTGAGATGGAGT)
(1,CTCCCCTGCTGTATGTTTCCT)
(1,CGTTTAACTGATAATCCATAC)
(1,ATCCACCCGCCTCGGCCTCCC)
(1,GCTCCCACTAGGGCTGCAGTC)
(1,CCTACAAGATGGCATTTAAAA)
(1,GGGCTGTCACATGGTTGGAAC)
(1,ATTTTTGTAGAAACGGGGTTT)
(1,TTTGAATATCTTTATCACAGT)
(1,TTGCCAAGAGTATAAGTTTCC)
(1,AGTAATAATCACAGTGATGAC)
(1,GCCAAGAGTATAAGTTTCCTG)
(1,TAAAAGCAGTCCCTCACGCAC)
(1,GTGATTTCATCTTATTTTTGG)
(1,CACTGCAGCCTCAACCTCCTG)
(1,CTCAGGCCTTGCAAGAATCTC)
(1,GCTCGTTTAACTGATAATCCA)
(1,CTTTCCGATTTTATTTCCCAT)
(1,ACAAGATGGCATTTAAAAGCA)
(1,TTGGAAAAAAAAACAAACTAA)
(1,CCACCCGCCTCGGCCTCCCAA)
(1,TCAAGTGATCCATCTCTGCCT)
(1,CTGTCACATGGTTGGAACCTC)
(1,CTTTACCAGCACCCAAGTCAA)
(1,GATAATCCATACTATATTTGA)
(1,CATAATTGGCACTGTCCCTGT)
(1,AATTGGATATCCTCCCAAATT)
(1,AGTGATTTCATCTTATTTTTG)
(1,CCAACAGCTAAATGTACCCAA)
(1,CATTGACTGTTCAATGCCTAT)
(1,AGCAGTGGGAGGTGGTGGCCA)
(1,CCCAGCTAATTTTTGTATTTT)
(1,TGCCTGTTGCCAAGAGTATAA)
(1,CCTCACAATGCCAACAGCTAA)
(1,AGAAACGGGGTTTCACCATGT)
(1,TGCTGGGACTACAGGCATGAG)
(1,TAAGTTTCCTGAGGCCTCCCA)
(1,GATTTCATCTTATTTTTGGAA)
(1,GGCACCCACCATCATGCCCAG)
(1,TATAAGTTTCCTGAGGCCTCC)
(1,TAATTTTTGTATTTTTGTAGA)
(1,TAAAATGCCCCCATCTTCCCA)
(1,AATTCAAATTGGATATCCTCC)
(1,AAACGGGGTTTCACCATGTTG)
(1,TGTCCCTGTAAATTCAAATTG)
(1,GAGCATGAAAGTAATATATGC)
(1,CGGCAAAGTGAGGGAGGAATA)
(1,TTAAACAGCTCGTTTAACTGA)
(1,TATGCTCAGTGTAAACAATTA)
(1,TTTCCCATGTAACAGTGTTGT)
(1,GAAGAGGTGCCTGTTGCCAAG)
(1,GGTCATTATAAATATATTTAA)
(1,TCTTAGGTGCTGAGCAGTGGG)
(1,TAGGGCTGCAGTCAATTCCCA)
(1,ACTGCAGCCTCAACCTCCTGG)
(1,TGCTGCACTGTGAAGAGGTGC)
(1,CTGGGACTACAGGCATGAGGC)
(1,GTATTTTTGTAGAAACGGGGT)
(1,CCCAAGTGATTTCATCTTATT)
(1,GCTGCAGTCAATTCCCAGGTC)
(1,TTGTATTTTTGTAGAAACGGG)
(1,AGTTATTTAAAGCCTTTAAAA)
(1,AGAGAAGCTCCCACTAGGGCT)
(1,TTATTCCTATTTATAATCAGG)
(1,CAACCTCCTGGGCCCAAGTGA)
(1,CACCATCATGCCCAGCTAATT)
(1,AGCATGCATATGGCTAACGGC)
(1,GCCTCAACCTCCTGGGCCCAA)
(1,ATTCTCTCTCCTGCTGCACTG)
(1,GATCCACCCGCCTCGGCCTCC)
(1,ATCTTCCCAGAGCTGCCAGCC)
(1,TAATTGGCACTGTCCCTGTAA)
(1,TTTTGTTTGTTTGTTTTGAGA)
(1,CCCTCAAACATCACACTCCAC)
(1,AGTTGTAACTTTGAATATCTT)
(1,ACTGATAATCCATACTATATT)
(1,TATCTTCCTCCCCTGCTGTAT)
(1,AGGGTGAAACACCTGTGCCAA)
(1,CCCAAATTTTATTTAAGCAAT)
(1,TACCAGCACCCAAGTCAAAAA)
(1,TTGGATATCCTCCCAAATTTT)
(1,CACCCACCATCATGCCCAGCT)
(1,CAAAGTGAGGGAGGAATAATT)
(1,TTTTTGTATTTTTGTAGAAAC)
(1,GTTTCCTGAGGCCTCCCAGGC)
(1,GATTGTCCTATTTAATTATTC)
(1,CAATGCCTATACAAGTAAAAC)
(1,GGGCTCAAGTGATCCATCTCT)
(1,TTAATAAATGTTGATTGTCCT)
(1,TCAAACTCCTGGGCTCAAGTG)
(1,CTTTATTTTTATTTTTAAGGT)
(1,TTTTTTGTTTGTTTGTTTTGA)
(1,TATCACAGTTATTTAAAGCCT)
(1,TTACCAGCACCCAAGTCAAAA)
(1,GCCTATACAAGTAAAACTTTA)
(1,TAGCATGCATATGGCTAACGG)
(1,TTTGTAGAAACGGGGTTTCAC)
(1,ATGCATATGGCTAACGGCAAA)
(1,TGCAGGTTGAGTAGGATTATT)
(1,ACTGGGTCTCACTATGTTGCC)
(1,TATTTAAAGCCTTTAAAAAGC)
(1,TTTCTTTCTTTCTTTCTCTTT)
(1,CCTATTTAATTATTCTCAACT)
(1,GTGATCCATCTCTGCCTTCCA)
(1,CTCCACGTTCTTCAGCTTTAG)
(1,GTAAATTCAAATTGGATATCC)
(1,AATGCCCCCATCTTCCCAGAG)
(1,TTCAGTTGTAACTTTGAATAT)
(1,TGGGCTCAAGTGATCCATCTC)
(1,ACAATTAGGTCATTATAAATA)
(1,TTTCATCTTATTTTTGGAAAA)
(1,GCCTTGCAGGTTGAGTAGGAT)
(1,CATTATAAATATATTTAACAG)
(1,TTTTGAGCATGAAAGTAATAT)
(1,AAGTCAAAAAGAAAAAAAAGG)
(1,ACGGGGTTTCACCATGTTGGC)
(1,TCACAATGCCAACAGCTAAAT)
(1,CAACAGCTAAATGTACCCAAG)
(1,GCCTTGCAAGAATCTCTACTG)
(1,TCTCACTATGTTGCCTAGGCT)
(1,ATAAATGTTGATTGTCCTATT)
(1,ATTCCCAGGTCTTAGGTGCTG)
(1,CAGCCTTATTCCTATTTATAA)
(1,ACAAGATAGTACTTGAGCTAA)
(1,ACTCTCATTGACTGTTCAATG)
(1,TGTAAATTCAAATTGGATATC)
(1,AAAGTTTGGCTTTCAGTTGTA)
(1,ATAAGAGCAGCCTTATTCCTA)
(1,CAAACTCCTGGGCTCAAGTGA)
(1,CAGTGGGAGGTGGTGGCCATG)
(1,TAAGGTTTTTTTTGTTTGTTT)
(1,CATGAGGCACCGCGCCTGGCC)
(1,TCAAACATCACACTCCACGTT)
(1,CCGCCTCGGCCTCCCAAAGTG)


5.统计:
计数:上面reduce后的操作后的串的数量为:
scala> kmers.count
res44: Long = 1087

总共的kmers为:

scala> val sum0=for((a,b)<-kmers) yield a
warning: there were 1 deprecation warning(s); re-run with -deprecation for details
sum0: org.apache.spark.rdd.RDD[Long] = MapPartitionsRDD[18] at map at <console>:29

scala> sum0.sum
res45: Double = 1100.0


分析阶段的代码没在源码中


源码来源:

【1】 https://github.com/bigdatagenomics/adam


Adam中的源码:

【2】 org.bdgenomics.adam.rdd.read中的adamCountKmers和org.bdgenomics.adam.cli.CountReadKmersArgs 


org.bdgenomics.adam.rdd.read中的adamCountKmers:

 def adamCountKmers(kmerLength: Int): RDD[(String, Long)] = {
    rdd.flatMap(r => {
      // cut each read into k-mers, and attach a count of 1L
      r.getSequence
        .toString
        .sliding(kmerLength)
        .map(k => (k, 1L))
    }).reduceByKey((k1: Long, k2: Long) => k1 + k2)
  }


org.bdgenomics.adam.cli.CountReadKmersArgs :


/**
 * Licensed to Big Data Genomics (BDG) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The BDG licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.bdgenomics.adam.cli

import java.util.logging.Level
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.{ SparkContext, Logging }
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.ParquetLogger
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

object CountReadKmers extends BDGCommandCompanion {
  val commandName = "count_kmers"
  val commandDescription = "Counts the k-mers/q-mers from a read dataset."

  def apply(cmdLine: Array[String]) = {
    new CountReadKmers(Args4j[CountReadKmersArgs](cmdLine))
  }
}

class CountReadKmersArgs extends Args4jBase with ParquetArgs {
  @Argument(required = true, metaVar = "INPUT", usage = "The ADAM, BAM or SAM file to count kmers from", index = 0)
  var inputPath: String = null
  @Argument(required = true, metaVar = "OUTPUT", usage = "Location for storing k-mer counts", index = 1)
  var outputPath: String = null
  @Argument(required = true, metaVar = "KMER_LENGTH", usage = "Length of k-mers", index = 2)
  var kmerLength: Int = 0
  @Args4jOption(required = false, name = "-print_histogram", usage = "Prints a histogram of counts.")
  var printHistogram: Boolean = false
  @Args4jOption(required = false, name = "-repartition", usage = "Set the number of partitions to map data to")
  var repartition: Int = -1
}

class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCommand[CountReadKmersArgs] with Logging {
  val companion = CountReadKmers

  def run(sc: SparkContext) {

    // Quiet Parquet...
    ParquetLogger.hadoopLoggerLevel(Level.SEVERE)

    // read from disk
    var adamRecords: RDD[AlignmentRecord] = sc.loadAlignments(
      args.inputPath,
      projection = Some(Projection(AlignmentRecordField.sequence)))

    if (args.repartition != -1) {
      log.info("Repartitioning reads to '%d' partitions".format(args.repartition))
      adamRecords = adamRecords.repartition(args.repartition)
    }

    // count kmers
    val countedKmers = adamRecords.adamCountKmers(args.kmerLength)

    // cache counted kmers
    countedKmers.cache()

    // print histogram, if requested
    if (args.printHistogram) {
      countedKmers.map(kv => kv._2.toLong)
        .countByValue()
        .toSeq
        .sortBy(kv => kv._1)
        .foreach(println)
    }

    // save as text file
    countedKmers.saveAsTextFile(args.outputPath)
  }

}


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值