前言
通过实现constant propagation
和live variable analysis
两种数据流分析,删除死代码(unreachable code
,dead assignment
)
Unreachable code
Control-flow Unreachable
:控制流不会到达的地方,比如return
语句后的代码
int controlFlowUnreachable(){
int x = 1;
return x;
int z = 42;//from this,unreachable
foo(z);
}
Unreachable Branch
:if
条件是个常量
int unreachableBranch(){
int a = 1,b = 0,c;
if(a > b)
c = 2333;
else
c = 6666;//unreachable
return c;
}
需要对if的条件值做常量传播分析,遍历CFG,标记可达结点即可,其余的就为不可达。
Dead Assignment
变量被赋值却从未使用,可以被删除
int deadAssign(){
int a, b, c;
a = 0;//dead assign
a = 1;
b = a*2;//dead assign
c = 3;
return c;
}
需要做live variable analysis
,但要注意如果x = expr
如果x是dead的也不能删掉,因为如果RHS是函数调用的话函数执行也会有一些其他的影响
这里再多考虑一下,如果我们的一次分析找出上述代码的两行dead assin
,将其删除后其实发现a=1
也是个dead assign
Dead Assignment
框架性的代码就不再贴出了,相信看过之前constant propagation
代码的应该没有太大问题,我们来看一些核心代码
如下得到deadAssignments
:
public void LVTest(Body body) {
BriefUnitGraph cfg = new BriefUnitGraph(body);
LiveVariableAnalysis liveVariableAnalysis = new LiveVariableAnalysis(cfg);
System.out.print("============================= ");
System.out.println(String.format("Liveness of method %s", body.getMethod().getSignature()));
Set<Unit> deadAssignments = new HashSet<>();
for(Unit unit : cfg){
System.out.println(String.format("Before %s: %s", unit, liveVariableAnalysis.getFlowBefore(unit)));
System.out.println(String.format("After %s: %s", unit, liveVariableAnalysis.getFlowAfter(unit)));
// 如果是本地变量的赋值语句,判断是否是 dead assignment
if (unit instanceof AssignStmt){
AssignStmt assign = (AssignStmt) unit;
Value v = assign.getLeftOp();
if (v instanceof Local){
Local local = (Local) v;
LVFlowSet<Local> liveSet = liveVariableAnalysis.getFlowAfter(assign);
// 是否live & 没有调用
if (!liveSet.contains(local) && !mayHaveSideEffect(assign)) {
deadAssignments.add(unit);
}
}
}
}
System.out.print("============================= ");
System.out.println(String.format("End of Liveness of method %s", body.getMethod().getSignature()));
for(Unit unit : deadAssignments){
System.out.println(unit);
}
}
LiveVariableAnalysis
package com.DeadCodeDetection;
import soot.Local;
import soot.Unit;
import soot.Value;
import soot.ValueBox;
import soot.toolkits.graph.DirectedGraph;
import soot.toolkits.scalar.BackwardFlowAnalysis;
public class LiveVariableAnalysis extends BackwardFlowAnalysis<Unit, LVFlowSet<Local>> {
public LiveVariableAnalysis(DirectedGraph<Unit> graph) {
super(graph);
super.doAnalysis();
}
@Override
protected void flowThrough(LVFlowSet<Local> in, Unit unit, LVFlowSet<Local> out) {
//这里注意一下先use后define,所以先kill后gen(backward)
copy(in, out);
kill(unit, out);
gen(unit, out);
}
private void kill(Unit unit, LVFlowSet<Local> out) {
for(ValueBox defBox : unit.getDefBoxes()){
Value v = defBox.getValue();
if(v instanceof Local){
out.remove((Local) v);
}
}
}
private void gen(Unit unit, LVFlowSet<Local> out) {
for(ValueBox useBox : unit.getUseBoxes()){
Value v = useBox.getValue();
if(v instanceof Local){
out.add((Local) v);
}
}
}
@Override
protected LVFlowSet<Local> newInitialFlow() {
return new LVFlowSet<>();
}
@Override
protected LVFlowSet<Local> entryInitialFlow() {
return new LVFlowSet<>();
}
@Override
protected void merge(LVFlowSet<Local> src1, LVFlowSet<Local> src2, LVFlowSet<Local> dest) {
LVFlowSet<Local> source = src1.union(src2);
copy(source, dest);
}
protected void copy(LVFlowSet<Local> source, LVFlowSet<Local> dest) {
dest.copyFrom(source);
}
}
特别注意这是个backward
的分析,注意方向
LVFlowSet
package com.DeadCodeDetection;
import java.util.HashSet;
import java.util.Set;
public class LVFlowSet<T> {
private Set<T> delegateSet;
public LVFlowSet() {
this(new HashSet<>());
}
public LVFlowSet(Set<T> delegateSet) {
this.delegateSet = delegateSet;
}
public void copyFrom(LVFlowSet<T> source) {
delegateSet.clear();
delegateSet.addAll(source.delegateSet);
}
public LVFlowSet<T> union(LVFlowSet<T> in2) {
LVFlowSet<T> result = new LVFlowSet<>();
result.delegateSet.addAll(delegateSet);
result.delegateSet.addAll(in2.delegateSet);
return result;
}
public void remove(T v) {
delegateSet.remove(v);
}
public void add(T v) {
delegateSet.add(v);
}
@Override
public String toString() {
return delegateSet.toString();
}
public boolean contains(T local) {
return delegateSet.contains(local);
}
}
Test
int deadAssign() {
int a, b, c;
a = 0; // dead assignment
a = 1;
b = a * 2; // dead assignment
c = 3;
return c;
}
可以看到最后结果
Unreachable Code
Branch
这里就先只关注if,先得到不可达的边集
private EdgeSet findUnreachableBranches(Body body, DirectedGraph<Unit> cfg) {
CPFlowAnalysis CP = new CPFlowAnalysis(cfg);//调用之前写过的常量传播分析
EdgeSet unreachableBranches = new EdgeSet();
for(Unit unit : body.getUnits()){
if(unit instanceof IfStmt){
IfStmt ifStmt = (IfStmt) unit;
Value v = ifStmt.getCondition();
// 获取IfStmt之前的数据,用于计算v的值
CPFlowSet dataflow = CP.getFlowBefore(ifStmt);
CPValue cpValue = dataflow.computeValue(v);//还要加入比较规则
//如果是常量
if (cpValue != CPValue.getUndef() && cpValue != CPValue.getNAC()) {
if (cpValue.val() == 0) {
// 为true的分支不可达
unreachableBranches.addEdge(ifStmt, ifStmt.getTarget());
} else if(cpValue.val() == 1){
// 为false的分支不可达
unreachableBranches.addEdge(ifStmt, body.getUnits().getSuccOf(ifStmt));
}
}
}
}
return unreachableBranches;
}
这里有几个点要多注意一下:
-
比较的规则,这个不算难(再稍微补下makeConstant函数)
-
加边的规则建议对照着Jimple文件看一个if的例子就可以了
EdgeSet部分的代码:
package com.DeadCodeDetection;
import soot.Unit;
import soot.toolkits.scalar.Pair;
import java.util.HashSet;
import java.util.Set;
public class EdgeSet {
private Set<Pair<Unit,Unit>> edgeSet;
public EdgeSet() {
this(new HashSet<>());
}
public EdgeSet(Set<Pair<Unit, Unit>> edgeSet) {
this.edgeSet = edgeSet;
}
public void addEdge(Unit from, Unit to) {
edgeSet.add(new Pair<>(from, to));
}
boolean containsEdge(Unit from, Unit to) {
return edgeSet.contains(new Pair<>(from, to));
}
}
得到不可达的边集后,再得到不可达的代码
private Set<Unit> findUnreachableCode(DirectedGraph<Unit> cfg, EdgeSet unreachableEdgeSet) {
Set<Unit> unreachableUnits = new HashSet<>();
//类似BFS
Set<Unit> visited = new HashSet<>();
Queue<Unit> q = new LinkedList<>();
q.add(getEntry(cfg));
while (!q.isEmpty()){
Unit curr = q.poll();//获取并移除队列头
// 如果已经访问过了,直接返回
if (visited.contains(curr)) {
continue;
}
// 对当前Unit做标记,表示已经访问过
visited.add(curr);
// 将可达后继入队列,不可达的后记也不会进入
List<Unit> succs = cfg.getSuccsOf(curr);//获得后继
for (Unit succ : succs) {
if (!unreachableEdgeSet.containsEdge(curr, succ)) {
q.add(succ);
}
}
}
// 未被访问过的Unit,为unreachable code
for (Unit unit : cfg) {
if (!visited.contains(unit)) {
unreachableUnits.add(unit);
}
}
return unreachableUnits;
}
private Unit getEntry(DirectedGraph<Unit> cfg) {
return cfg.getHeads().get(0);
}
可以看一下下面,cfg的形式,更助于了解
考虑在前言提出的问题,所以放在一个循环里
public Set<Unit> findDeadCode(Body body) {
//在body的拷贝上操作
body = (Body) body.clone();
Set<Unit> result = new HashSet<>();
int prevSize = -1;
while (prevSize != result.size()) {
prevSize = result.size();
UnitGraph cfg = new BriefUnitGraph(body);//创建CFG
// live variable analysis找到dead assign
Set<Unit> deadAssignments = findDeadAssigenment(body, cfg);
// 利用常量传播找到不可达分支
EdgeSet unreachableBranchEdge = findUnreachableBranches(body, cfg);
// 找到不可达代码
Set<Unit> unreachableCode = findUnreachableCode(cfg, unreachableBranchEdge);
// 添加到不可达代码集合中
result.addAll(deadAssignments);
result.addAll(unreachableCode);
// 删除dead code或者不可达代码
System.out.println(String.format("- - - - - Dead Code Of Method %s - - - - -", body.getMethod().getName()));
body.getUnits().removeIf(unit -> {
boolean ret = result.contains(unit);
if (ret) {
System.out.println(unit);
}
return ret;
});
System.out.println(String.format("- - - - - End of Dead Code Of Method %s - - - - -\n\n", body.getMethod().getName()));
}
return result;
}
在DCTransformer
里调用该方法即可
Control flow
对于前言的例子,编译都无法通过,不过应该也不难,大致应该为findUnreachableBranches
中多加规则
TestCode
public class TestCode {
int deadAssign() {
int a, b, c;
a = 0; // dead assignment
a = 1;// 一轮后也为 dead
b = a * 2; // dead assignment
c = 3;
return c;
}
int unrechableBranch() {
int a = 1, b = 0, c;
if (a > b){
c = 2333;
System.out.println("reach");
}
else{
c = 666; // unreachable branch
System.out.println("also unreachable");
b = 1;
System.out.println(b);
return 2;
}
return c;
}
}