hadoop@Master:~/xubo/data/testTools/se$ avocado-submit /xubo/avocado/hs2.fq /xubo/avocado/hs38DH.fa /xubo/avocado/test20160527NUMhs2snap /home/hadoop/xubo/data/testTools/se/snap-basic.properties
Using SPARK_SUBMIT=/home/hadoop/cloud/spark-1.5.2//bin/spark-submit
Command body threw exception:
java.lang.IllegalArgumentException: No input stage with name: /home/hadoop/xubo/data/testTools/se/hs2.fq
Exception in thread "main" java.lang.IllegalArgumentException: No input stage with name: /home/hadoop/xubo/data/testTools/se/hs2.fq
at org.bdgenomics.avocado.input.Input$.apply(Input.scala:57)
at org.bdgenomics.avocado.cli.Avocado$$anonfun$4.apply(Avocado.scala:208)
at org.bdgenomics.avocado.cli.Avocado$$anonfun$4.apply(Avocado.scala:208)
at org.apache.spark.rdd.Timer.time(Timer.scala:57)
at org.bdgenomics.avocado.cli.Avocado.run(Avocado.scala:207)
at org.bdgenomics.utils.cli.BDGSparkCommand$class.run(BDGCommand.scala:54)
at org.bdgenomics.avocado.cli.Avocado.run(Avocado.scala:82)
at org.bdgenomics.utils.cli.BDGCommandCompanion$class.main(BDGCommand.scala:32)
at org.bdgenomics.avocado.cli.Avocado$.main(Avocado.scala:52)
at org.bdgenomics.avocado.cli.Avocado.main(Avocado.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
问题2:
hadoop@Master:~/xubo/data/testTools$ avocado-submit /xubo/avocado/hs2.fq /xubo/avocado/hs38DH.fa /xubo/avocado/test20160527NUMhs2snap /home/hadoop/xubo/data/testTools/se/snap-basic.propertiesUsing SPARK_SUBMIT=/home/hadoop/cloud/spark-1.5.2//bin/spark-submit
Command body threw exception:
java.lang.IllegalArgumentException: No input stage with name: SnapInputStage
Exception in thread "main" java.lang.IllegalArgumentException: No input stage with name: SnapInputStage
at org.bdgenomics.avocado.input.Input$.apply(Input.scala:57)
at org.bdgenomics.avocado.cli.Avocado$$anonfun$4.apply(Avocado.scala:208)
at org.bdgenomics.avocado.cli.Avocado$$anonfun$4.apply(Avocado.scala:208)
at org.apache.spark.rdd.Timer.time(Timer.scala:57)
at org.bdgenomics.avocado.cli.Avocado.run(Avocado.scala:207)
at org.bdgenomics.utils.cli.BDGSparkCommand$class.run(BDGCommand.scala:54)
at org.bdgenomics.avocado.cli.Avocado.run(Avocado.scala:82)
at org.bdgenomics.utils.cli.BDGCommandCompanion$class.main(BDGCommand.scala:32)
at org.bdgenomics.avocado.cli.Avocado$.main(Avocado.scala:52)
at org.bdgenomics.avocado.cli.Avocado.main(Avocado.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
配置:
问题1的:
hadoop@Master:~/xubo/data/testTools/se$ vi snap-basic.properties
{
SnapInputStage =
{
indexDirectory = /Users/fnothaft/IdeaProjects/avocado2/avocado/chrM
snapPath = /Users/fnothaft/IdeaProjects/snap/snap
numMachines = 1
coresPerMachine = 4
}
readExplorer =
{
}
biallelicGenotyper = {
}
defPart =
{
}
#inputStage = SnapInputStage
inputStage = /home/hadoop/xubo/data/testTools/se/hs2.fq
preprocessorNames = ( );
preprocessorAlgorithms = ( );
explorerName = readExplorer;
explorerAlgorithm = ReadExplorer;
genotyperName = biallelicGenotyper;
genotyperAlgorithm = BiallelicGenotyper;
postprocessorNames = ( );
postprocessorAlgorithms = ( );
}
"snap-basic.properties" 32L, 640C 1,1 All
问题2的:
hadoop@Master:~/xubo/data/testTools$ cat snap-basic.properties
{
SnapInputStage =
{
indexDirectory = /home/xubo/xubo/data/testTools
snapPath = /home/xubo/xubo/tools/snap
numMachines = 1
coresPerMachine = 1
}
readExplorer =
{
}
biallelicGenotyper = {
}
defPart =
{
}
inputStage = SnapInputStage
preprocessorNames = ( );
preprocessorAlgorithms = ( );
explorerName = readExplorer;
explorerAlgorithm = ReadExplorer;
genotyperName = biallelicGenotyper;
genotyperAlgorithm = BiallelicGenotyper;
postprocessorNames = ( );
postprocessorAlgorithms = ( );
}
问题1是由于修改了snapinputstage
问题2定位:
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.avocado.input
import org.bdgenomics.formats.avro.{ AlignmentRecord, NucleotideContigFragment }
import org.apache.commons.configuration.HierarchicalConfiguration
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
object Input {
// all our input stages
val stages = List(AlignedReadsInputStage)
/**
* Builds the input stage that corresponds to the given stage name, and returns the read data
* that the stage provides. The input stage name to use is collected from the provided
* configuration.
*
* @param sc A SparkContext
* @param inputPath Path to input read data.
* @param reference
* @param config Configuration file containing the necessary data.
* @return Returns an RDD of read data.
*/
def apply(sc: SparkContext,
inputPath: String,
reference: RDD[NucleotideContigFragment],
config: HierarchicalConfiguration): RDD[AlignmentRecord] = {
// get input stage to use; if none is specified, default to input being aligned reads
val stageName: String = config.getString("inputStage", "AlignedReads")
val stage = stages.find(_.stageName == stageName)
stage match {
case Some(s: InputStage) => {
val stageConfig = config.configurationAt(stageName)
s.apply(sc, inputPath, stageConfig, reference)
}
case None => {
throw new IllegalArgumentException("No input stage with name: " + stageName)
}
}
}
}
原因在分析