import weka.core.Instances; //导入方法依赖的package包/类
/**
Generate Random sample according to random seed on Desktop, each sample has the same distribution of InTrace/OutTrace
* and have SIZE instances.
*
* @param path original arff file to be sampled in path
* @param rand random seed
* @param num the number of selection
* */
public static void generateARFF(String path, int rand, int num) throws Exception{
/*** original dataset reading */
Instances data = DataSource.read(path);
data.setClassIndex(data.numAttributes()-1);
/*** randomize the dataset */
data.randomize(new Random(rand));
/*** dataIn to save instances of InTrace class */
Instances dataIn = new Instances("dataIn", InsMerge.getStandAttrs(), 1);
dataIn.setClassIndex(dataIn.numAttributes() - 1);
/*** dataOut to save instances of OutTrace class */
Instances dataOut = new Instances("dataOut", InsMerge.getStandAttrs(), 1);
dataIn.setClassIndex(dataIn.numAttributes() - 1);
/*** add OutTrace instances into dataOut */
for(int i=0; i
if(data.get(i).stringValue(data.get(i).classAttribute()).equals("OutTrace")){
dataOut.add(data.get(i));
}
}
/** add InTrace instances into dataIn */
for(int i=0; i
if(data.get(i).stringValue(data.get(i).classAttribute()).equals("InTrace")){
dataIn.add(data.get(i));
}
}
/*** get the In/Out ratio in original dataset */
int inTrace = dataIn.numInstances();
int outTrace = dataOut.numInstances();
double ratioI = inTrace*1.0/(outTrace + inTrace);
/*** expected number to select from original dataset*/
int intrace = (int) (num * ratioI);
int outtrace = num - intrace;
/** create new generated dataset train*/
Instances train = new Instances("dataIn", InsMerge.getStandAttrs(), 1);
train.setClassIndex(train.numAttributes() - 1);
/** train get X instances from dataIn*/
for(int i=0; i
train.add(dataIn.get(i));
}
/** train get Y instances from dataOut*/
for(int j=0; j
train.add(dataOut.get(j));
}
/** save the dataset in path, we save the arff into D:/Users/LEE/Desktop/New_Data/XXX.arff */
String filename = "files/generated/" + filterName(path) + rand + ".arff";
DataSink.write(filename, train);
}