slicer4j设置了一系列的项目,用于基准测试。在Slicer4j文件夹下的benchmarks存在很多项目。
主要有JavaSlicer benchmarks , Slicer bechmarks , Defects4J Programs。我们主要探究如何运行defects4j 的项目。
在官方readme文档中已经写的很清楚了,执行run_benchmarks.py就可以了,现在就来研究研究这个python文件,来确定slicer4j运行defects4j项目的流程。
defects4j_benchmarks = {
"JacksonDatabind_3b": ("target/jackson-databind-2.4.1-SNAPSHOT.jar", "org.junit.runner.JUnitCore com.fasterxml.jackson.databind.deser.TestArrayDeserialization", "com.fasterxml.jackson.databind.ObjectMapper", "3062", "_readMapAndClose", "JacksonDatabind_3b/target/test-classes/:JacksonDatabind_3b/target/dependency/*"),
"Gson_4b": ("gson/target/gson-2.6-SNAPSHOT.jar", "junit.textui.TestRunner com.google.gson.stream.JsonReaderTest", "com.google.gson.stream.JsonReader", "1422", "checkLenient", "Gson_4b/gson/target/test-classes/:Gson_4b/gson/target/dependency/*"),
"JacksonCore_4b": ("target/jackson-core-2.5.0-SNAPSHOT.jar", "org.junit.runner.JUnitCore com.fasterxml.jackson.core.util.TestTextBuffer", "com.fasterxml.jackson.core.util.TextBuffer", "587", "expandCurrentSegment", "JacksonCore_4b/target/test-classes/:JacksonCore_4b/target/dependency/*"),
}
for idx, project in enumerate(defects4j_benchmarks):
jar_name = f"{project}/"+defects4j_benchmarks[project][0]
project_arg = defects4j_benchmarks[project][1]
sc_file = defects4j_benchmarks[project][2]
slice_line = defects4j_benchmarks[project][3]
slice_method = defects4j_benchmarks[project][4]
extra_libs = defects4j_benchmarks[project][5]
print(f"====================")
print(f"Benchmark: {project}")
build_jar(project)
print(f"********************")
run_original(project, jar_name, project_arg, extra_libs)
print(f"********************")
print(f"Running Slicer4J")
run_slicer4j(project, jar_name, project_arg, extra_libs, sc_file, slice_line)
print(f"********************")
print(f"Running JavaSlicer")
run_javaslicer(project, jar_name, project_arg, extra_libs, sc_file, slice_line, slice_method)
主要分为四个步骤,build_jar , run_original , run_slicer4J 和 run_javaslicer
def build_jar(project):
cwd = os.getcwd()
os.chdir(f"{project}")
cmd = f"mvn clean package > /dev/null 2>&1"
os.system(cmd)
cmd = f"mvn -Dmaven.test.skip=true package > /dev/null 2>&1"
os.system(cmd)
cmd = "mvn dependency:copy-dependencies > /dev/null 2>&1"
os.system(cmd)
os.chdir(cwd)
其中,build_jar , run_slicer4J是我们比较关心的。这里build jar 有三个命令,它还打包了一些dependency , 对于mvn不熟悉的同学可能只会第二步或第一步,导致打出来的结果不能用,跟着原作者学习就好多了。注意,defects4j项目编译和运行都要使用jdk1.8,否则会失败。
在run slicer4j 的步骤
def run_slicer4j(project, jar_name, project_arg, extra_libs, sc_file, slice_line):
slice_file = "slice-result.log"
cwd = os.getcwd()
out_dir=f"results/{project}"
start_instr = time.time()
if os.path.isdir(out_dir):
os.system(f"rm -r {out_dir}")
os.mkdir(out_dir)
instr_cmd = f"java -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -m i -j {jar_name} -o {out_dir}/ -sl {out_dir}/static-log.log -lc {dynamic_slicing_core}/DynamicSlicingLoggingClasses/DynamicSlicingLogger.jar > /dev/null 2>&1"
os.system(instr_cmd)
instr_time = time.time()
print(f"Instrumentation time (s): {instr_time-start_instr}")
instrumented_jar = os.path.basename(jar_name).replace(".jar", "_i.jar")
cmd = f"java -cp \"{out_dir}/{instrumented_jar}:{extra_libs}\" {project_arg} | grep \"SLICING\" > {out_dir}/trace.log"
# print(cmd)
os.system(cmd)
trace = list()
with open(f"{out_dir}/trace.log", 'r') as f:
for l in f:
if "FIELD" in l:
del trace[-1]
trace.append(l.rstrip())
with open(f"{out_dir}/trace.log", 'w') as f:
for t in trace:
f.write(t+"\n")
run_time = time.time()
print(f"Execution time (s): {run_time-instr_time}")
graph_cmd = f"java -Xmx8g -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -m g -j {jar_name} -t {out_dir}/trace.log -o {out_dir}/ -sl {out_dir}/static-log.log -sd {slicer4j_dir}/../models/summariesManual -tw {slicer4j_dir}/../models/EasyTaintWrapperSource.txt > /dev/null 2>&1"
# print(graph_cmd)
os.system(graph_cmd)
sc = None
if not sc_file:
with open(f"{out_dir}/trace.log_icdg.log", 'r') as f:
for l in f:
if f"println" in l:
sc = l.rstrip()
line = sc.split(", ")[0]
else:
print(f"looking for LINENO:{slice_line}:FILE:{sc_file}")
with open(f"{out_dir}/trace.log_icdg.log", 'r') as f:
for l in f:
if f"LINENO:{slice_line}:FILE:{sc_file}" in l:
sc = l.rstrip()
line = sc.split(", ")[0]
slice_cmd = f"java -Xmx8g -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -j {jar_name} -m s -t {out_dir}/trace.log -o {out_dir}/ -sl {out_dir}/static-log.log -sd {slicer4j_dir}/../models/summariesManual -tw {slicer4j_dir}/../models/EasyTaintWrapperSource.txt -sp {line} -d > {out_dir}/{slice_file}_{line}.log 2>&1"
os.system(slice_cmd)
slice_time = time.time()
print(f"Slice time (s): {slice_time-run_time}")
num_lines = count_lines_slice_slicer4j(f"{out_dir}")
print(f"Slice size (Java LoC): {num_lines}")
主要执行了以下几个shell 命令
instr_cmd = f"java -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -m i -j {jar_name} -o {out_dir}/ -sl {out_dir}/static-log.log -lc {dynamic_slicing_core}/DynamicSlicingLoggingClasses/DynamicSlicingLogger.jar > /dev/null 2>&1"
cmd = f"java -cp \"{out_dir}/{instrumented_jar}:{extra_libs}\" {project_arg} | grep \"SLICING\" > {out_dir}/trace.log"
graph_cmd = f"java -Xmx8g -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -m g -j {jar_name} -t {out_dir}/trace.log -o {out_dir}/ -sl {out_dir}/static-log.log -sd {slicer4j_dir}/../models/summariesManual -tw {slicer4j_dir}/../models/EasyTaintWrapperSource.txt > /dev/null 2>&1"
slice_cmd = f"java -Xmx8g -cp \"{slicer4j_dir}/target/slicer4j-jar-with-dependencies.jar:{slicer4j_dir}/target/lib/*\" ca.ubc.ece.resess.slicer.dynamic.slicer4j.Slicer -j {jar_name} -m s -t {out_dir}/trace.log -o {out_dir}/ -sl {out_dir}/static-log.log -sd {slicer4j_dir}/../models/summariesManual -tw {slicer4j_dir}/../models/EasyTaintWrapperSource.txt -sp {line} -d > {out_dir}/{slice_file}_{line}.log 2>&1"
这里要想一步步搞懂也挺麻烦,应该是和它前面的介绍有关,intr_cmd应该和工具一些初始的条件有关,接着cmd是输出了一个trace.log文件,为下一步做准备,graph_cmd输出了一堆东西。
全搞明白挺麻烦,所以最方面的方式还是使用作者提供的py文件来进行,我们只需要修改开头的defects4j_benchmarks中的某几个参数即可。
第一个参数是项目jar包;第二个参数是出故障那个单元测试,要把前面的那一堆包也加上;第三个参数是单元测试检测出来的那个可能出错的类,在报错的信息里里面可以找到,第四个参数就是具体出错在类的哪一行,这里建议第三个第四个参数尽量把报错信息里面的类,行都覆盖,否则容易切片切多,把错误代码也筛出去了;第五个参数是第三个参数里面的类的具体方法;第六个参数就是依赖库。