Day 7 HomeWork-DeadCodeDetection

最新推荐文章于 2024-07-12 16:26:30 发布

CyanM0un

最新推荐文章于 2024-07-12 16:26:30 发布

阅读量447

点赞数

分类专栏：静态分析文章标签： java

本文链接：https://blog.csdn.net/WDWAGAAFGAGDADSA/article/details/122811880

版权

静态分析专栏收录该内容

12 篇文章 6 订阅

订阅专栏

前言

通过实现constant propagation和live variable analysis两种数据流分析，删除死代码（unreachable code，dead assignment）

Unreachable code

Control-flow Unreachable：控制流不会到达的地方，比如return语句后的代码

int controlFlowUnreachable(){
    int x = 1;
    return x;
    int z = 42;//from this,unreachable
    foo(z);
}

Unreachable Branch：if条件是个常量

int unreachableBranch(){
    int a = 1,b = 0,c;
    if(a > b)
        c = 2333;
    else
        c = 6666;//unreachable
    return c;
}

需要对if的条件值做常量传播分析，遍历CFG，标记可达结点即可，其余的就为不可达。

Dead Assignment

变量被赋值却从未使用，可以被删除

int deadAssign(){
    int a, b, c;
    a = 0;//dead assign
    a = 1;
    b = a*2;//dead assign
    c = 3;
    return c;
}

需要做live variable analysis，但要注意如果x = expr如果x是dead的也不能删掉，因为如果RHS是函数调用的话函数执行也会有一些其他的影响

这里再多考虑一下，如果我们的一次分析找出上述代码的两行dead assin，将其删除后其实发现a=1也是个dead assign

Dead Assignment

框架性的代码就不再贴出了，相信看过之前constant propagation代码的应该没有太大问题，我们来看一些核心代码

如下得到deadAssignments：

public void LVTest(Body body) {
    BriefUnitGraph cfg = new BriefUnitGraph(body);
    LiveVariableAnalysis liveVariableAnalysis = new LiveVariableAnalysis(cfg);

    System.out.print("============================= ");
    System.out.println(String.format("Liveness of method %s", body.getMethod().getSignature()));
    Set<Unit> deadAssignments = new HashSet<>();
    for(Unit unit : cfg){
        System.out.println(String.format("Before %s: %s", unit, liveVariableAnalysis.getFlowBefore(unit)));
        System.out.println(String.format("After %s: %s", unit, liveVariableAnalysis.getFlowAfter(unit)));
        // 如果是本地变量的赋值语句，判断是否是 dead assignment
        if (unit instanceof AssignStmt){
            AssignStmt assign = (AssignStmt) unit;
            Value v = assign.getLeftOp();
            if (v instanceof Local){
                Local local = (Local) v;
                LVFlowSet<Local> liveSet = liveVariableAnalysis.getFlowAfter(assign);
                // 是否live & 没有调用
                if (!liveSet.contains(local) && !mayHaveSideEffect(assign)) {
                    deadAssignments.add(unit);
                }
            }
        }
    }
    System.out.print("============================= ");
    System.out.println(String.format("End of Liveness of method %s", body.getMethod().getSignature()));
    for(Unit unit : deadAssignments){
        System.out.println(unit);
    }
}

在这里插入图片描述

LiveVariableAnalysis

package com.DeadCodeDetection;

import soot.Local;
import soot.Unit;
import soot.Value;
import soot.ValueBox;
import soot.toolkits.graph.DirectedGraph;
import soot.toolkits.scalar.BackwardFlowAnalysis;

public class LiveVariableAnalysis extends BackwardFlowAnalysis<Unit, LVFlowSet<Local>> {

    public LiveVariableAnalysis(DirectedGraph<Unit> graph) {
        super(graph);
        super.doAnalysis();
    }

    @Override
    protected void flowThrough(LVFlowSet<Local> in, Unit unit, LVFlowSet<Local> out) {
        //这里注意一下先use后define，所以先kill后gen(backward)
        copy(in, out);
        kill(unit, out);
        gen(unit, out);
    }

    private void kill(Unit unit, LVFlowSet<Local> out) {
        for(ValueBox defBox : unit.getDefBoxes()){
            Value v = defBox.getValue();
            if(v instanceof Local){
                out.remove((Local) v);
            }
        }
    }

    private void gen(Unit unit, LVFlowSet<Local> out) {
        for(ValueBox useBox : unit.getUseBoxes()){
            Value v = useBox.getValue();
            if(v instanceof Local){
                out.add((Local) v);
            }
        }
    }

    @Override
    protected LVFlowSet<Local> newInitialFlow() {
        return new LVFlowSet<>();
    }

    @Override
    protected LVFlowSet<Local> entryInitialFlow() {
        return new LVFlowSet<>();
    }

    @Override
    protected void merge(LVFlowSet<Local> src1, LVFlowSet<Local> src2, LVFlowSet<Local> dest) {
        LVFlowSet<Local> source = src1.union(src2);
        copy(source, dest);
    }

    protected void copy(LVFlowSet<Local> source, LVFlowSet<Local> dest) {
        dest.copyFrom(source);
    }
}

特别注意这是个backward的分析，注意方向

在这里插入图片描述

LVFlowSet

package com.DeadCodeDetection;

import java.util.HashSet;
import java.util.Set;

public class LVFlowSet<T> {

    private Set<T> delegateSet;

    public LVFlowSet() {
        this(new HashSet<>());
    }

    public LVFlowSet(Set<T> delegateSet) {
        this.delegateSet = delegateSet;
    }

    public void copyFrom(LVFlowSet<T> source) {
        delegateSet.clear();
        delegateSet.addAll(source.delegateSet);
    }

    public LVFlowSet<T> union(LVFlowSet<T> in2) {
        LVFlowSet<T> result = new LVFlowSet<>();
        result.delegateSet.addAll(delegateSet);
        result.delegateSet.addAll(in2.delegateSet);
        return result;
    }

    public void remove(T v) {
        delegateSet.remove(v);
    }

    public void add(T v) {
        delegateSet.add(v);
    }

    @Override
    public String toString() {
        return delegateSet.toString();
    }

    public boolean contains(T local) {
        return delegateSet.contains(local);
    }
}

Test

int deadAssign() {
    int a, b, c;
    a = 0; // dead assignment
    a = 1;
    b = a * 2; // dead assignment
    c = 3;
    return c;
}

可以看到最后结果

在这里插入图片描述

Unreachable Code

Branch

这里就先只关注if，先得到不可达的边集

private EdgeSet findUnreachableBranches(Body body, DirectedGraph<Unit> cfg) {
    CPFlowAnalysis CP = new CPFlowAnalysis(cfg);//调用之前写过的常量传播分析
    EdgeSet unreachableBranches = new EdgeSet();
    for(Unit unit : body.getUnits()){
        if(unit instanceof IfStmt){
            IfStmt ifStmt = (IfStmt) unit;
            Value v = ifStmt.getCondition();

            // 获取IfStmt之前的数据，用于计算v的值
            CPFlowSet dataflow = CP.getFlowBefore(ifStmt);
            CPValue cpValue = dataflow.computeValue(v);//还要加入比较规则
            //如果是常量
            if (cpValue != CPValue.getUndef() && cpValue != CPValue.getNAC()) {
                if (cpValue.val() == 0) {
                    // 为true的分支不可达
                    unreachableBranches.addEdge(ifStmt, ifStmt.getTarget());
                } else if(cpValue.val() == 1){
                    // 为false的分支不可达
                    unreachableBranches.addEdge(ifStmt, body.getUnits().getSuccOf(ifStmt));
                }
            }
        }
    }
    return unreachableBranches;
}

这里有几个点要多注意一下：

比较的规则，这个不算难（再稍微补下makeConstant函数）
加边的规则建议对照着Jimple文件看一个if的例子就可以了

EdgeSet部分的代码：

package com.DeadCodeDetection;

import soot.Unit;
import soot.toolkits.scalar.Pair;

import java.util.HashSet;
import java.util.Set;

public class EdgeSet {

    private Set<Pair<Unit,Unit>> edgeSet;

    public EdgeSet() {
        this(new HashSet<>());
    }

    public EdgeSet(Set<Pair<Unit, Unit>> edgeSet) {
        this.edgeSet = edgeSet;
    }

    public void addEdge(Unit from, Unit to) {
        edgeSet.add(new Pair<>(from, to));
    }

    boolean containsEdge(Unit from, Unit to) {
        return edgeSet.contains(new Pair<>(from, to));
    }
}

得到不可达的边集后，再得到不可达的代码

private Set<Unit> findUnreachableCode(DirectedGraph<Unit> cfg, EdgeSet unreachableEdgeSet) {
    Set<Unit> unreachableUnits = new HashSet<>();
    //类似BFS
    Set<Unit> visited = new HashSet<>();
    Queue<Unit> q = new LinkedList<>();
    q.add(getEntry(cfg));
    while (!q.isEmpty()){
        Unit curr = q.poll();//获取并移除队列头

        // 如果已经访问过了，直接返回
        if (visited.contains(curr)) {
            continue;
        }
        // 对当前Unit做标记，表示已经访问过
        visited.add(curr);

        // 将可达后继入队列，不可达的后记也不会进入
        List<Unit> succs = cfg.getSuccsOf(curr);//获得后继
        for (Unit succ : succs) {
            if (!unreachableEdgeSet.containsEdge(curr, succ)) {
                q.add(succ);
            }
        }
    }

    // 未被访问过的Unit，为unreachable code
    for (Unit unit : cfg) {
        if (!visited.contains(unit)) {
            unreachableUnits.add(unit);
        }
    }

    return unreachableUnits;
}

private Unit getEntry(DirectedGraph<Unit> cfg) {
    return cfg.getHeads().get(0);
}

可以看一下下面，cfg的形式，更助于了解
在这里插入图片描述

考虑在前言提出的问题，所以放在一个循环里

public Set<Unit> findDeadCode(Body body) {
    //在body的拷贝上操作
    body = (Body) body.clone();
    Set<Unit> result = new HashSet<>();
    int prevSize = -1;
    while (prevSize != result.size()) {
        prevSize = result.size();

        UnitGraph cfg = new BriefUnitGraph(body);//创建CFG

        // live variable analysis找到dead assign
        Set<Unit> deadAssignments = findDeadAssigenment(body, cfg);

        // 利用常量传播找到不可达分支
        EdgeSet unreachableBranchEdge = findUnreachableBranches(body, cfg);

        // 找到不可达代码
        Set<Unit> unreachableCode = findUnreachableCode(cfg, unreachableBranchEdge);

        // 添加到不可达代码集合中
        result.addAll(deadAssignments);
        result.addAll(unreachableCode);

        // 删除dead code或者不可达代码
        System.out.println(String.format("- - - - - Dead Code Of Method %s - - - - -", body.getMethod().getName()));
        body.getUnits().removeIf(unit -> {
            boolean ret = result.contains(unit);
            if (ret) {
                System.out.println(unit);
            }
            return ret;
        });
        System.out.println(String.format("- - - - - End of Dead Code Of Method %s - - - - -\n\n", body.getMethod().getName()));
    }

    return result;
}

在DCTransformer里调用该方法即可

Control flow

对于前言的例子，编译都无法通过，不过应该也不难，大致应该为findUnreachableBranches中多加规则

TestCode

public class TestCode {

    int deadAssign() {
        int a, b, c;
        a = 0; // dead assignment
        a = 1;// 一轮后也为 dead
        b = a * 2; // dead assignment
        c = 3;
        return c;
    }

    int unrechableBranch() {
        int a = 1, b = 0, c;
        if (a > b){
            c = 2333;
            System.out.println("reach");
        }
        else{
            c = 666; // unreachable branch
            System.out.println("also unreachable");
            b = 1;
            System.out.println(b);
            return 2;
        }
        return c;
    }

}