老外写的js反混淆教程

最新推荐文章于 2025-04-18 17:45:04 发布

lacoucou

最新推荐文章于 2025-04-18 17:45:04 发布

阅读量4.3k

点赞数 3

分类专栏： js反混淆文章标签： javascript 前端 html

原文链接：https://steakenthusiast.github.io/archives/

版权

js反混淆专栏收录该内容

20 篇文章

订阅专栏

地址：

Archives-ReverseJS

7 posts in total

2022
06-14
Deobfuscating Javascript via AST: A Peculiar JSFuck-esque Case
06-04
Deobfuscating Javascript via AST: Removing Dead or Unreachable Code
05-31
Deobfuscating Javascript via AST: Replacing References to Constant Variables with Their Actual Value
05-28
Deobfuscating Javascript via AST: Constant Folding/Binary Expression Simplification
05-28
Deobfuscating Javascript via AST: Converting Bracket Notation => Dot Notation for Property Accessors
05-22
Deobfuscating Javascript via AST: Reversing Various String Concealing Techniques
05-21
An Introduction to Javascript Obfuscation & Babel

Introduction

Welcome to the first article in my series about Javascript deobfuscation. I won’t be going in-depth regarding practical deobfuscation techniques; that’ll be reserved for later articles. Rather, this post serves as a brief overview of the state of Javascript obfuscation, different methods of analysis, and provides resources to learn more about reverse engineering Javascript.

What is Obfuscation?

Definition

Obfuscation is a series of code transformations that turn human-readable code into something that is deliberately difficult for a human to understand, while (for the most part) still maintaining its original functionality. Code authors may choose to obfuscate their code for many reasons, including but not limited to:

To make it harder to modify, debug, or reproduce (e.g. some javascript-based games or programs)
To hide malicious intent (e.g. malware)
To enhance security, i.e obscuring the logic behind javascript-based challenges or fingerprinting (e.g. ReCAPTCHA, Shape Security, PerimeterX, Akamai, DataDome)

Example

For example, the obfuscation process can convert this human-readable script:

1	`console.log("Hello");` JS

Into something incomprehensible to humans:

var _0x3b8ba1 = _0x57e2;
function _0x57e2(_0x23db1e, _0x36111b) {
  var _0x3bbee9 = _0x5936();
  return (
    (_0x57e2 = function (_0x194e56, _0x27d4e2) {
      _0x194e56 = _0x194e56 - 0x17e;
      var _0x2b5447 = _0x3bbee9[_0x194e56];
      return _0x2b5447;
    }),
    _0x57e2(_0x23db1e, _0x36111b)
  );
}
(function (_0x3d5379, _0x27d8c9) {
  var _0x26235b = _0x57e2,
    _0x556a19 = _0x3d5379();
  while (!![]) {
    try {
      var _0x15999f =
        parseInt(_0x26235b(0x183)) / 0x1 +
        parseInt(_0x26235b(0x185)) / 0x2 +
        (parseInt(_0x26235b(0x194)) / 0x3) *
          (-parseInt(_0x26235b(0x18d)) / 0x4) +
        parseInt(_0x26235b(0x188)) / 0x5 +
        (-parseInt(_0x26235b(0x18b)) / 0x6) *
          (-parseInt(_0x26235b(0x187)) / 0x7) +
        -parseInt(_0x26235b(0x182)) / 0x8 +
        -parseInt(_0x26235b(0x195)) / 0x9;
      if (_0x15999f === _0x27d8c9) break;
      else _0x556a19["push"](_0x556a19["shift"]());
    } catch (_0x3cc29a) {
      _0x556a19["push"](_0x556a19["shift"]());
    }
  }
})(_0x5936, 0x4bc84);
var _0x5cff45 = (function () {
    var _0x5a2bb8 = !![];
    return function (_0x2e90c1, _0x495f04) {
      var _0x1ac9d1 = _0x5a2bb8
        ? function () {
            var _0x261249 = _0x57e2;
            if (_0x495f04) {
              var _0x3c800c = _0x495f04[_0x261249(0x198)](_0x2e90c1, arguments);
              return (_0x495f04 = null), _0x3c800c;
            }
          }
        : function () {};
      return (_0x5a2bb8 = ![]), _0x1ac9d1;
    };
  })(),
  _0x1ea628 = _0x5cff45(this, function () {
    var _0x4f765e = _0x57e2;
    return _0x1ea628[_0x4f765e(0x17f)]()
      ["search"](_0x4f765e(0x18e))
      ["toString"]()
      ["constructor"](_0x1ea628)
      [_0x4f765e(0x192)](_0x4f765e(0x18e));
  });
_0x1ea628();
function _0x5936() {
  var _0x7289e8 = [
    "Hello\x20World!",
    "toString",
    "log",
    "__proto__",
    "2888432EGELDh",
    "516645rknrWL",
    "trace",
    "928870xUjHrE",
    "error",
    "27965akgdka",
    "2813765Wufwlg",
    "return\x20(function()\x20",
    "warn",
    "48zUcTLM",
    "bind",
    "2668xZhNIu",
    "(((.+)+)+)+$",
    "prototype",
    "console",
    "table",
    "search",
    "length",
    "615NtfKnc",
    "6908400qvcpUL",
    "exception",
    "constructor",
    "apply",
  ];
  _0x5936 = function () {
    return _0x7289e8;
  };
  return _0x5936();
}
var _0x27d4e2 = (function () {
    var _0x494152 = !![];
    return function (_0x2d8431, _0x2bbb6a) {
      var _0x1528ad = _0x494152
        ? function () {
            if (_0x2bbb6a) {
              var _0x4f8607 = _0x2bbb6a["apply"](_0x2d8431, arguments);
              return (_0x2bbb6a = null), _0x4f8607;
            }
          }
        : function () {};
      return (_0x494152 = ![]), _0x1528ad;
    };
  })(),
  _0x194e56 = _0x27d4e2(this, function () {
    var _0x2df84e = _0x57e2,
      _0x50a5eb;
    try {
      var _0x458538 = Function(
        _0x2df84e(0x189) + "{}.constructor(\x22return\x20this\x22)(\x20)" + ");"
      );
      _0x50a5eb = _0x458538();
    } catch (_0x55824d) {
      _0x50a5eb = window;
    }
    var _0x22e34f = (_0x50a5eb[_0x2df84e(0x190)] =
        _0x50a5eb[_0x2df84e(0x190)] || {}),
      _0x4b7f35 = [
        _0x2df84e(0x180),
        _0x2df84e(0x18a),
        "info",
        _0x2df84e(0x186),
        _0x2df84e(0x196),
        _0x2df84e(0x191),
        _0x2df84e(0x184),
      ];
    for (
      var _0x24f5c9 = 0x0;
      _0x24f5c9 < _0x4b7f35[_0x2df84e(0x193)];
      _0x24f5c9++
    ) {
      var _0x126b34 =
          _0x27d4e2[_0x2df84e(0x197)][_0x2df84e(0x18f)][_0x2df84e(0x18c)](
            _0x27d4e2
          ),
        _0x427a50 = _0x4b7f35[_0x24f5c9],
        _0xdec475 = _0x22e34f[_0x427a50] || _0x126b34;
      (_0x126b34[_0x2df84e(0x181)] = _0x27d4e2[_0x2df84e(0x18c)](_0x27d4e2)),
        (_0x126b34[_0x2df84e(0x17f)] =
          _0xdec475["toString"][_0x2df84e(0x18c)](_0xdec475)),
        (_0x22e34f[_0x427a50] = _0x126b34);
    }
  });
_0x194e56(), console[_0x3b8ba1(0x180)](_0x3b8ba1(0x17e));

Yet, believe it or not, both of these scripts have the exact same functionality! You can test it yourself: both scripts output

1	`Hello World` EBNF

to the console.

The State of Javascript Obfuscation

There are many available javascript obfuscators, both closed and open-source. Here’s a small list:

Open-Source

Closed-Source

For further reading on the why and how’s of Javascript Obfuscation, I recommend checking out the Jscrambler blog posts. For now, though, I’ll shift the topic towards reverse engineering.

How is Obfuscated Code Analyzed?

In general, most reverse engineering/deobfuscation techniques fall under two categories: static analysis and dynamic analysis

Static Analysis

Static analysis refers to the inspection of source code without actually executing the program. An example of static analysis is simplifying source code with Regex.

Dynamic Analysis

Dynamic analysis refers to the testing and analysis of an application during run time/evaluation. An example of dynamic analysis is using a debugger.

Static vs. Dynamic Analysis Use-Cases

Since static analysis does not execute code, it makes it ideal for analyzing untrusted scripts. For example, when analyzing malware, you may want to use static analysis to avoid infection of your computer.

Dynamic analysis is used when a script is known to be safe to run. Debuggers can be powerful tools for reverse engineering, as they allow you to view the state of the program at different points in the runtime. Additionally, dynamic analysis can be (and often is) used for malware analysis too, but only after taking proper security precautions (i.e sandboxing).

Static and dynamic analysis are powerful when used together. For example, debugging a script containing a lot of junk code can be difficult. Or, the code may contain anti-debugging protection (e.g. infinite debugger loops). In this case, someone may first use static inspection of source code to simplify the source code, then proceed with dynamic analysis using the modified source.

Introducing Babel

Babel is a Javascript to Javascript compiler. The functionalities included with the Babel framework make it exceptionally useful for any javascript deobfuscation use case, since you can use it for static analysis and dynamic analysis!

Let me give a short explanation of how it works:

Javascript is an interpreted programming language. For Javascript to be interpreted by an engine (e.g. Chrome’s V8 engine or Firefox’s Spidermonkey) into machine code, it is first parsed into an Abstract Syntax Tree (AST). After that, the AST is used to generate machine-readable byte-code, which is then executed.

Babel works in a similar fashion. It takes in Javascript code, parses it into an AST, then outputs javascript based on that AST.

Okay, sounds interesting. But what even is an AST?

Definition: Abstract Syntax Tree

An Abstract Syntax Tree (AST) is a tree-like structure that hierarchically represents the syntax of a piece of source code. Each node of the tree represents the occurrence of a predefined structure in the source code. Any piece of source code, from any programming language, can be represented as an AST.

Note: Even though the concepts behind an AST are universal, different programming languages may have a different AST specifications based on their capabilities.

Some practical uses of ASTs include:

Validating Code
Formatting Code
Syntax Highlighting

And, of course, due to the more verbose nature of ASTs relative to plaintext source code, it makes them a great tool for reverse engineering 😁

Unfortunately, I won’t be giving a more in-depth definition of ASTs. This is for the sake of time, and since that’d be more akin to the subject of compiler theory than deobfuscation. I’d prefer to get right into explaining the usage of Babel as quickly as possible. However, I’ll leave you with some resources to read up more about ASTs (which probably offer a better explanation than I could muster anyway):

Wikipedia - Abstract Syntax Trees
How JavaScript works: Parsing, Abstract Syntax Trees (ASTs) + 5 tips on how to minimize parse time

How Babel Works

Babel can be installed the same way as any other NodeJS package. For our purposes, the following packages are relevant:

@babel/core This encapsulates the entire Babel compiler API.
@babel/parser The module Babel uses to parse Javascript source code and generate an AST
@babel/traverse The module that allows for traversing and modifying the generated AST
@babel/generator The module Babel uses to generate Javascript code from the AST.
@babel/types A module for verifying and generating node types as defined by the Babel AST implementation.

When compiling code, Babel goes through three main phases:

Parsing => Uses @babel/parser API
Transforming => Uses @babel/traverse API
Code Generation => Uses @babel/generator API

I’ll give you a (very) short summary of each of these phases:

Stages of Babel

Phase #1: Parsing

During this phase, Babel takes source code as an input and outputs an AST. Two stages of parsing are Lexical Analysis and Syntactic Analysis.

To parse code into an AST, we make use of @babel/parser. The following is an example of parsing code from a file, sourcecode.js:

1
2
3

const parser = require("@babel/parser");
const code = fs.readFileSync("sourcecode.js", "utf-8");
let ast = parser.parse(code);

JAVASCRIPT

You can read more about the parsing phase here:
Babel Plugin Handbook - Parsing
Babel Docs - @babel/parser

Phase 2: Transforming

The transformation phase is the most important phase. During this phase, Babel takes the generated AST and traverses it to add, update, or remove nodes. All the deobfuscation transformations we write are executed in this stage. This stage will be the main focus of future tutorials.

Phase 3: Code Generation

The code generation phase takes in the final AST and converts it back to executable Javascript.

The Babel Workflow

This section will not discuss any practical deobfuscation techniques. It will only detail the general process of analyzing source code. I’ll be using an unobfuscated piece of code as an example.

When deobfuscating Javascript, I typically follow this workflow:

Visualization
Analysis
Writing the Deobfuscator

Phase 1: Visualization with AST Explorer

Before we can write any plugins for a deobfuscator, we should always first visualize the code’s AST. To help us with that, we will leverage an online tool: AstExplorer.net.

AST Explorer serves as an interactive AST playground. It allows you to choose a programming language and parser. In this case, we would select Javascript as the programming language and @babel/parser as the parser. After that, we can paste some source code into the window and inspect the generated AST on the right-hand side.

As an example, I’ll use this snippet:

function operation(arg1, arg2) {
  let step1 = arg1 + arg2;
}

let foo = operation(6, 8);

JAVASCRIPT

正在上传…重新上传取消

Result from pasting the code snippet in AST Explorer

The generated AST looks like this:

Click to Expand

We can observe that even for this small little program, the AST representation is incredibly verbose. It’s composed of different types of nodes (FunctionDeclarations, ExpressionStatements, Identifiers, CallExpressions, etc.), and many nodes also have a sub node. To transform the AST, we’ll be making use of the Babel traverse package to recursively traverse the tree and modify nodes.

Phase 2: Coming Up With The Transformation Logic/Pseudo-code

This isn’t an obfuscated file, but we’ll still write a plugin to demonstrate the traverse package’s functionality.

Let’s assign ourselves an arbitrary goal of transforming the script to replace all occurrences of arithmetic addition operators (+) with arithmetic multiplication operators (*). That is, the final script should look like this:

function operation(arg1, arg2) {
  return arg1 * arg2;
}

let foo = operation(6, 8);

JAVASCRIPT

Determining the Target Node Type(s)

First, we need to determine what our node type(s) of interest are. If we highlight a section of the code, AST explorer will automatically expand that node on the right-hand side. In our case, we want to focus on the arg1 + arg2 operation. After highlighting that piece of code, we’ll see this:

正在上传…重新上传取消

A closer look at the nodes of interest

We can see that arg1 + arg2 has been parsed into a BinaryExpression node. This node has the following properties:

type stores the node’s type, in this case: BinaryExpression
left stores the information for the left side of the expression, in this case: the arg1 identifier.
right stores the information for the right side of the expression, in this case: the arg2 identifier.
operator stores the operator, in this case: +.

Our goal is to replace all + operators in the script with a * operator, so it makes sense that our node type of interest is a BinaryExpression.

Now that we have our target node type, we need to figure out how we’ll transform them

Transformation Logic

To reiterate: we know that we’re looking for BinaryExpressions. Each BinaryExpression has a property, operator. We want to edit this property to * if it is a +.

The logical process would therefore look like this:

Parse the code to generate an AST.
Traverse the AST in search of BinaryExpressions.
If one is encountered, check that its operator is currently equal to +. If it isn’t, skip that node.
If the operator is equal to +, set the operator to *.

Now that we understand the logic, we can write it as code

Phase 3: Writing the Transformation Code

To parse the tree, we will use the @babel/parser package as previously demonstrated. To traverse the generated AST and modify the nodes, we’ll make use of @babel/traverse.

To target a specific node type during traversal, we’ll use a visitor[https://github.com/jamiebuilds/babel-handbook/blob/master/translations/en/plugin-handbook.md#visitors].

From the Babel Plugin Handbook:

Visitors are a pattern used in AST traversal across languages. Simply put they are an object with methods defined for accepting particular node types in a tree.

To target nodes of type BinaryExpression, our visitor would like like this:

const changeOperatorVisitor = {
  BinaryExpression(path) {
    // transformations here ...
  },
};

JAVASCRIPT

Now, every time a BinaryExpression is encountered, the BinaryExpression(path) method will be called.

Inside the BinaryExpression(path) method of our visitor, we can add code for any checks and transformations.

Each visitor method takes in a parameter, path, which holds the path to the node being visited. To access the actual properties of the node, we must use path.node.

Our first step in our transformation would be to check that the operator property of the node is a +. We can do that like this:

const changeOperatorVisitor = {
  BinaryExpression(path) {
    if (path.node.operator == "+") {
      // continue with transformations...
    } else {
      return; // Skip the node
    }
  },
};

JAVASCRIPT

If it is a +, we can set it to *.

const changeOperatorVisitor = {
  BinaryExpression(path) {
    // Check if operator is +
    if (path.node.operator == "+") {
      // Set operator as *
      path.node.operator = "*";
    } else {
      return; // Skip the node
    }
  },
};

JAVASCRIPT

And our visitor is complete! Now we just need to call it on the generated AST. But first, let’s generate the AST:

const parser = require("@babel/parser");
const generate = require("@babel/generator").default;
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
// Set the source code
const code = `
function operation(arg1, arg2) {
  return arg1 * arg2;
}
let foo = operation(6, 8);
`;
// Parse the source code into an AST
let ast = parser.parse(code);

JAVASCRIPT

After that, we can paste our visitor into the source code. To traverse the AST using the visitor, we’ll use the traverse method from the @babel/traverse package. That would look like this:

const parser = require("@babel/parser");
const generate = require("@babel/generator").default;
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
// Set the source code
const code = `
function operation(arg1, arg2) {
  return arg1 * arg2;
}
let foo = operation(6, 8);
`;
// Parse the source code into an AST
let ast = parser.parse(code);

// Visitor for modifying operator of BinaryExpression
const changeOperatorVisitor = {
  BinaryExpression(path) {
    // Check if operator is +
    if (path.node.operator == "+") {
      // Set operator as *
      path.node.operator = "*";
    } else {
      return; // Skip the node
    }
  },
};

traverse(ast, changeOperatorVisitor);

JAVASCRIPT

Finally, we’ll use the generate method from the @babel/generator package to generate the final code from the modified AST. We can also output the resulting code to a file, but I’ll just log it to the console for simplicity.

So, our final transformation script looks like this:

Babel Transformation Script

const parser = require("@babel/parser");
const generate = require("@babel/generator").default;
const traverse = require("@babel/traverse").default;
const types = require("@babel/types");
// Set the source code
const code = `
function operation(arg1, arg2) {
  return arg1 * arg2;
}
let foo = operation(6, 8);
`;
// Parse the source code into an AST
let ast = parser.parse(code);

// Visitor for modifying operator of BinaryExpression
const changeOperatorVisitor = {
  BinaryExpression(path) {
    // Check if operator is +
    if (path.node.operator == "+") {
      // Set operator as *
      path.node.operator = "*";
    } else {
      return; // Skip the node
    }
  },
};

traverse(ast, changeOperatorVisitor);

let finalCode = generate(ast).code;

console.log(finalCode);

JAVASCRIPT

This will output the following to the console:

function operation(arg1, arg2) {
  return arg1 * arg2;
}

let foo = operation(6, 8);

JAVASCRIPT

And we can see that the code has been successfully transformed to replace + operators with * operators!

Why use Babel for Deobfuscation?

So, why should we use Babel as a deobfuscation tool as opposed to other static analysis tools like Regex?

Here are a few reasons:

Ast is less error-prone.
- For large chunks of code, writing transformations can become incredibly tedious due to the edge cases. For example, it’s difficult to account for the scope and state of variables when using regex. For example, two different variables can share the same name if they’re in different scopes:

//Scope 1:
{
  let foo = 123;
  {
    let foo = 321;
    console.log(foo);
  }
  console.log(foo);
}

JAVASCRIPT

Eventually, regular expressions will become very convoluted when you have to account for edge cases; whether it be scope or tiny variations in syntax. Babel doesn’t have this problem, as you can use built-in functionality to make transformations with respect to scope and state.

The Babel API has a lot of useful features.

Here are a few useful things you can do with the built-in Babel API:
- Easily target certain nodes
- Handle scope when renaming/replacing variables
- Easily get initial values and references of variables
- Node validation, generation, cloning, replacement, removal
- Find paths to ancestor and descendant nodes based on test conditions
- Containers/Lists: Check if a node is in a container/list, and get all of its siblings
Good for static and dynamic analysis
- Inherently, parsing the code into an AST and applying transformations will not execute the code. But Babel also has the functionality to evaluate nodes (ex. BinaryExpressions) and return their actual value. Babel can also generate code from nodes, which can be evaluated with eval or the NodeJS VM.

Conclusion + Additional Resources

That was a short demonstration of transforming a piece of code with Babel! The next articles will be more in-depth and include practical cases of reversing obfuscation techniques you might encounter in the wild.

For the sake of time, I didn’t go too deep into the behind-the-scenes of Babel or all of its API methods. In the future, I may decide to update this article or write a new one with more detailed explanations, examples, and documentation. But, I really recommend getting a solid fundamental understanding of Babel’s features before continuing on in this series. Most notably, I didn’t cover the usage of the @babel/types package in this article, but it will be utilized in future ones. I’d recommend giving these resources a look:

Official Babel Docs
Babel Plugin Handbook
Video: @babel/how-to

Here are links to the other articles in this series:

You can also view the source code for all my deobfuscation tutorial posts in this repository

Okay, that’s all I have for you today. I hope that this article helped you learn something new. Thanks for reading, and happy reversing!

-----------------------------------------------------------------------------------------------------------------------

Preface

This article assumes a preliminary understanding of Abstract Syntax Tree structure and BabelJS. Click Here to read my introductory article on the usage of Babel.

What is String Concealing?

In JavaScript, string concealing is an obfuscation technique that transforms code in a way that disguises references to string literals. After doing so, the code becomes much less readable to a human at first glance. This can be done in multiple different ways, including but not limited to:

Encoding the string as a hexadecimal/Unicode representation,
Splitting a single string into multiple substrings, then concatenating them,
Storing all string literals in a single array and referencing an element in the array when a string value is required
Using an algorithm to encrypt strings, then calling a corresponding decryption algorithm on the encrypted value whenever its value needs to be read

In the following sections, I will provide some examples of these techniques in action and discuss how to reverse them.

Examples

Example #1: Hexadecimal/Unicode Escape Sequence Representations

Rather than storing a string as a literal, an author may choose to store it as an escape sequence. The javascript engine will parse the actual string literal value of an escaped string before it is used or printed to the console. However, it’s virtually unreadable to an ordinary human. Below is an example of a sample obfuscated using this technique.

Original Source Code

/**
 * "Input.js"
 * Original, unobfuscated code.
 *
*/

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favEmoji = emoji;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite emoji is " +
      this.favEmoji;
    console.log(helloStatement);
  }
}

const examplePerson = new Person("David", "University of Obfuscation", "🤪");

examplePerson.sayHello();

JAVASCRIPT

Post-Obfuscation Code

/**
 * "stringEscapeObfuscated.js"
 * This is the resulting code after obfuscation.
 *
*/

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favEmoji = emoji;
  }
  sayHello() {
    let helloStatement =
      "\x48\x65\x6c\x6c\x6f\x2c\x20\x6d\x79\x20\x6e\x61\x6d\x65\x20\x69\x73\x20" + // Hexadecimal Escape Sequence
      this["\x6e\x61\x6d\x65"] + // Hexadecimal encoding of member expression property
      "\u002e\u0020\u0049\u0020\u0067\u006f\u0020\u0074\u006f\u0020" + // Unicode Escape Sequence
      this["\u0073\u0063\u0068\u006f\u006f\u006c"] + // Unicode encoding of member expression property
      "\x20\x61\x6e\x64\x20\x6d\x79\x20\x66\x61\x76\x6f\x75\x72\x69\x74\x65\u0020\u0065\u006d\u006f\u006a\u0069\u0020\u0069\u0073\u0020" + // Hexadecimal and Unicode Mix Escape Sequence
      this["\x66\x61\x76\u0045\u006d\u006f\u006a\u0069"]; // Hexadecimal and Unicode encoding of member expression property
    console.log(helloStatement);
  }
}

const examplePerson = new Person(
  "\u0044\u0061\u0076\u0069\u0064", // Unicode Escape Sequence */
  "\x55\x6e\x69\x76\x65\x72\x73\x69\x74\x79\x20\x6f\x66\x20\x4f\x62\x66\x75\x73\x63\x61\x74\x69\x6f\x6e", // Hexadecimal Escape Sequence
  "\u{1F92A}" // Curly Bracket Unicode Escape Sequence
);

examplePerson.sayHello();

JAVASCRIPT

Analysis Methodology

Despite appearing daunting at first glance, this obfuscation technique is relatively trivial to reverse. To begin, let’s copy and paste the obfuscated sample into AST Explorer

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

Our targets of interest here are the obfuscated strings, which are of type StringLiteral. Let’s take a closer look at one of these nodes:

正在上传…重新上传取消

A closer look at one of the obfuscated StringLiteral nodes

We can deduce two things from analyzing the structure of these nodes:

The actual, unobfuscated value has been parsed by Babel and is stored in the value property.
All nodes containing escaped text sequences have a property, extra which store the actual value and encoded text in extra.rawValue and extra.raw properties respectively

Since the parsed value is already stored in the value property, we can safely delete the extra property, causing Babel to default to the value property when generating the code and thereby restoring the original strings. To do this, we create a visitor that iterates through all StringLiteral_to nodes to delete the **_extra** property if it exists. After that, we can generate code from the resulting AST to get the deobfuscated result. The babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
*/
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  /**
   * Visitor for removing encoding.
   */
  const deobfuscateEncodedStringVisitor = {
    StringLiteral(path) {
      if (path.node.extra) delete path.node.extra;
    },
  };

  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Execute the visitor
  traverse(ast, deobfuscateEncodedStringVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./stringEscapeObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favEmoji = emoji;
  }

  sayHello() {
    let helloStatement = "Hello, my name is " + this["name"] + ". I go to " + this["school"] + " and my favourite emoji is " + this["favEmoji"];
    console.log(helloStatement);
  }

}

const examplePerson = new Person("David", "University of Obfuscation", "\uD83E\uDD2A"); // Babel won't generate the actual representation of non-ascii characters
examplePerson.sayHello();

JAVASCRIPT

The strings are now deobfuscated, and the code becomes much easier to read.

Example #2: String-Array Map Obfuscation

This type of obfuscation removes references to string literals and places them in a special array. Whenever a value must be accessed, the obfuscated script will reference the original string’s position in the string array. This technique is often combined with the previously discussed technique of storing strings as hexadecimal/unicode escape sequences. To isolate the point in this example, I’ve chosen not to include additional encoding. Below is an example of this obfuscation technique in practice:

Original Source Code

/**
 * "Input.js"
 * Original, unobfuscated code.S
 *
*/

class Person {
  constructor(name, school, animal) {
    this.name = name;
    this.school = school;
    this.favAnimal = animal;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

const examplePerson = new Person("David", "University of Obfuscation", "Penguin");

examplePerson.sayHello();

JAVASCRIPT

Post-Obfuscation Code

/**
 * "stringArrayObfuscated.js"
 * This is the resulting code after obfuscation.
 *
*/

// This is the string array lookup table.
var _0xcd45 = [
  "name",
  "school",
  "favAnimal",
  "Hello, my name is ",
  ". I go to ",
  " and my favourite animal is a ",
  "log",
  "David",
  "University of Obfuscation",
  "Penguin",
  "sayHello",
];
class Person {
  constructor(name, school, animal) {
    // Member expression properties obfuscated using this technique
    this[_0xcd45[0]] = name;
    this[_0xcd45[1]] = school;
    this[_0xcd45[2]] = animal;
  }
  sayHello() {
    let helloStatement =
      _0xcd45[3] +
      this[_0xcd45[0]] +
      _0xcd45[4] +
      this[_0xcd45[1]] +
      _0xcd45[5] +
      this[_0xcd45[2]];
    console[_0xcd45[6]](helloStatement);
  }
}
const examplePerson = new Person(_0xcd45[7], _0xcd45[8], _0xcd45[9]);// Obfuscation of string arguments using this technique
examplePerson[_0xcd45[10]](); // Member expression property obfuscated using this technique

JAVASCRIPT

Analysis Methodology

Similar to the first example, this obfuscation technique is mostly for show and very trivial to undo. To begin, let’s copy and paste the obfuscated sample into AST Explorer

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

Our targets of interest here are the master array, _0xcd45 and its references. These references to it are of type MemberExpression. Let’s take a closer look at one of the MemberExpression nodes of interest.

正在上传…重新上传取消

A closer look at one of the obfuscated MemberExpression nodes

We can notice that, unlike the first example, babel does not compute the actual value of these member expressions for us. However, it does store the name of the array they are referencing and the position of the array to be accessed.

Let’s now expand the VariableDeclaration node that holds the string array.

正在上传…重新上传取消

A closer look at the Variable Declaration node for the _0xcd45 array

We can observe that the name of the string array,_0xcd45 is held in path.node.declarations[0].id.name. We can also see that path.node.declarations[0].init.elements is an array of nodes, which holds each node of the string literals declared in the string array. Finally, the string array is the first VariableDeclaration with an init value of type ArrayExpression encountered at the top of the file.

[Note: Traditionally, javascript obfuscators put the string arrays at the top of the file/code block. However, sometimes this may not always be the case (e.g. other string-containing arrays are declared first or reassignment of the string array). You may need to make a slight modification to this step in that case.]

Using those observations, we can come up with the following logic to restore the code:

Traverse the ast to search for the variable declaration of the string array. To check if it is the string array’s declaration, it must meet the following criteria:
1. The VariableDeclaration node must declare only ONE variable.
2. Its corresponding VariableDeclarator node must have an init property of type ArrayExpression
3. ALL of the elements of the ArrayExpression must be of type StringLiteral
After finding the declaration, we can:
1. Store the string array’s name in a variable, stringArrayName
2. Store a copy of all its elements in a variable, stringArrayElements
Find all references to the string array. One of the most powerful features of Babel is it’s support for scopes.

From the Babel Plugin Handbook:

References all belong to a particular scope; this relationship is known as a binding.

We’ll take advantage of this feature by doing the following:
1. To ensure that we are getting the references to the correct identifier, we will get the path of the id property and store it in a variable, idPath.
2. We will then get the binding of the string array, using idPath.scope.getBinding(stringArrayName) and store it in a variable, binding.
3. If the binding does not exist, we will skip this variable declarator by returning early.
4. The constant property of binding is a boolean determining if the variable is constant. If the value of constant is false (i.e, it is reassigned/modified), replacing the references will be unsafe. In that case, we will return early.
5. The referencePaths property of binding is an array containing every NodePaths that reference the string array. We’ll extract this to its own variable.
We will create a variable, shouldRemove, which will be a flag dictating whether or not we can remove the original VariableDeclaration. By default, we’ll initialize it to true. More on this in the next step.
We will loop through each individual referencePath of the referencePaths array, and check if they meet all the following criteria:
1. The parent NodePath of the current referencePath must be a MemberExpression. The reason we are checking the parent node is because the referencePath refers to the actual referenced identifier (in our example, _0xcd45), which would be contained in a MemberExpression parent node (such as _0xcd45[0])
2. The parent NodePath’s object field must be the the current referencePath’s node (that is, it must be the string array’s identifier)
3. The parent NodePath’s computed field must be true. This means that bracket notation is being used for member access (ex. _0xcd45[0]).
4. The parent NodePath’s property field must be of type NumericLiteral, so we can use it’s value to access the corresponding node by index.
If all of these criteria are met, we can lookup the corresponding node in our stringArrayElements array using the value stored in the parent NodePath’s property field, and safely replace the referencePath‘s parent path with it (that is, replace the entire MemberExpression with the actual string).
If at least one of these conditions are not met for the current referencePath, we will be unable to replace the referencePath. In this case, removing the original VariableDeclarator of the string array would be unsafe, since these references to it would be in the final code. Therefore, we should set our shouldDelete flag to false. We’ll then skip to the next iteration of the for loop.
After we have finished iterating over all the referencePaths, we will use the value of our shouldRemove flag to determine if it is safe to remove the original VariableDeclaration.

If shouldRemove still has the default value of true, that means all referencePaths have been successfully replaced, and the original declaration of the string array is no longer needed, so we can remove it.
If shouldRemove is equal to false, we encountered a referencePath that we could not replace. It is then unsafe to remove the original declaration of the string array, so we don’t remove it.

The Babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */

const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  /**
   * Visitor for removing encoding.
   */

  const deobfuscateStringArrayVisitor = {
    VariableDeclaration(path) {
      const { declarations } = path.node;
      if (
        // The VariableDeclaration node must declare only ONE variable.
        declarations.length !== 1 ||
        // It's corresponding VariableDeclarator node must have an init property of type ArrayExpression
        !t.isArrayExpression(declarations[0].init)
      )
        return; //skip

      const stringArrayElements = [];
      for (const elementNode of declarations[0].init.elements) {
        // ALL of the elements of the ArrayExpression_must be of type StringLiteral
        if (!t.isStringLiteral(elementNode)) return;
        else {
          // Store a copy of all its elements in a variable
          stringArrayElements.push(elementNode);
        }
      }
      // Store the string array's name in a variable
      const stringArrayName = declarations[0].id.name;
      // Get the path of the identifier. By using this path, we ensure we will ALWAYS correctly refer to the scope of the array
      const idPath = path.get("declarations.0.id");
      // Get the binding of the array.
      const binding = idPath.scope.getBinding(stringArrayName);

      if (!binding) return;

      const { constant, referencePaths } = binding;

      // This wouldn't be safe if the array was not constant.
      if (!constant) return;
      // This decides if we can remove the array or not.
      // If there are any references to the array that cannot be replaced, it is unsafe to remove the original VariableDeclaration.
      let shouldRemove = true;

      for (const referencePath of referencePaths) {
        const { parentPath: refParentPath } = referencePath;
        const { object, computed, property } = refParentPath.node;
        // Criteria to be a valid path for replacement:
        // The refParent must be of type MemberExpression
        // The "object" field of the refParent must be a reference to the array (the original referencePath)
        // The "computed" field of the refParent must be true (indicating use of bracket notation)
        // The "property" field of the refParent must be a numeric literal, so we can access the corresponding element of the array by index.
        if (
          !(
            t.isMemberExpression(refParentPath.node) &&
            object == referencePath.node &&
            computed == true &&
            t.isNumericLiteral(property)
          )
        ) {
          // If the above conditions aren't met, we've run into a reference that can't be replaced.
          // Therefore, it'd be unsafe to remove the original variable declaration, since it will still be referenced after our transformation has completed.
          shouldRemove = false;
          continue;
        }

        // If the above conditions are met:

        // Replace the parentPath of the referencePath (the actual MemberExpression) with it's actual value.

        refParentPath.replaceWith(stringArrayElements[property.value]);
      }

      if (shouldRemove) path.remove();
    },
  };

  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Execute the visitor
  traverse(ast, deobfuscateStringArrayVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./stringArrayObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

class Person {
  constructor(name, school, animal) {
    this["name"] = name;
    this["school"] = school;
    this["favAnimal"] = animal;
  }

  sayHello() {
    let helloStatement = "Hello, my name is " + this["name"] + ". I go to " + this["school"] + " and my favourite animal is a " + this["favAnimal"];
    console["log"](helloStatement);
  }

}

const examplePerson = new Person("David", "University of Obfuscation", "Penguin");
examplePerson["sayHello"]();

JAVASCRIPT

The strings are now deobfuscated, and the code becomes much easier to read.

Example #3: String Concatenation

This type of obfuscation, in its most basic form, takes a string such as the following:

1	`let myString = "Hello World";` JAVASCRIPT

And splits it into multiple parts:

1	`let myString = "He" + "l" + "l" + "o W" + "o" + "rl" + "d"; // => Hello World` JAVASCRIPT

You might be thinking, “Hey, the obfuscated version doesn’t look that bad”, and you’d be right. However, keep in mind that a file will typically have a lot more obfuscation layered on top. An example using the techniques already covered above could look something like this (or likely more advanced):

var _0xba8a = ["\x48\x65", "\x6C", "\x6F\x20\x57", "\x6F", "\x72\x6C", "\x64"]; //Encoded string array
let myString =
  _0xba8a[0] +
  _0xba8a[1] +
  _0xba8a[1] +
  _0xba8a[2] +
  _0xba8a[3] +
  _0xba8a[4] +
  _0xba8a[5]; // string concatenation

JAVASCRIPT

The following analysis will only cover the most basic case from the first example I showed you. Traditionally, a file’s obfuscation layers are peeled back one at a time. Your goal as a reverse engineer would be to make transformations to the code such that it looks like the basic case and only then apply this analysis.

Original Source Code

/**
 * "Input.js"
 * Original, unobfuscated code.
 *
*/

class Person {
  constructor(name, school, animal) {
    this.name = name;
    this.school = school;
    this.favAnimal = animal;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

const examplePerson = new Person("David", "University of Obfuscation", "DOGGO");

examplePerson.sayHello();

JAVASCRIPT

Post-Obfuscation Code

/**
 * "stringConcatenationObfuscated.js"
 * This is the resulting code after obfuscation.
 *
*/

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I g" + "o t"+ "o " +
      this.school +
      " an" + "d "+ "m"+"y"+ " fa"+"vo"+"ur"+"ite" +" ani"+"ma"+"l" +" is"  + " a "+
      this.favAnimal;
    console.log(helloStatement);
  }
}

const examplePerson = new Person("D"+"a"+"vi"+"d", "Un"+"ive"+"rsi"+"ty"+ " o"+"f " + "Ob"+"fus"+"cat"+"ion", "D"+"O"+"G"+"G"+"O");

examplePerson.sayHello();

JAVASCRIPT

Analysis Methodology

Let’s paste our obfuscated code into AST Explorer.

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

Our targets of interest here are all of the strings being concatenated. Let’s click on one of them to take a closer look at one of the nodes of interest.

正在上传…重新上传取消

A closer look at one of the nodes of interest

We can make the following observations from the AST structure:

We can see that each individual substring is of type StringLiteral.
More importantly, the string literals seem to be contained in multiple nested BinaryExpressions.

So how could we go about solving this?

There are a few ways to do this. One would be to work up recursively from the right-most StringLiteral node in the binary expression and manually concatenate the string at each step. However, there’s a much simpler way to accomplish the same thing using Babel’s inbuilt path.evaluate() function. The steps for coding the deobfuscator are included below:

Traverse through the AST to search for BinaryExpressions
If a BinaryExpression is encountered, try to evaluate it using path.evaluate().
If path.evaluate returns confident:true, check if the evaluated value is a StringLiteral. If either condition is false, return.
Replace the BinaryExpression node with the computed value as a StringLiteral, stored in value.

The babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  const deobfuscateStringConcatVisitor = {
    BinaryExpression(path) {
      let { confident, value } = path.evaluate(); // Evaluate the binary expression
      if (!confident) return; // Skip if not confident
      if (typeof value == "string") {
        path.replaceWith(t.stringLiteral(value)); // Substitute the simplified value
      }
    },
  };

  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Execute the visitor
  traverse(ast, deobfuscateStringConcatVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./stringConcatenationObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement = "Hello, my name is " + this.name + ". I g" + "o t" + "o " + this.school + " an" + "d " + "m" + "y" + " fa" + "vo" + "ur" + "ite" + " ani" + "ma" + "l" + " is" + " a " + this.favAnimal;
    console.log(helloStatement);
  }

}

const examplePerson = new Person("David", "University of Obfuscation", "DOGGO");
examplePerson.sayHello();

JAVASCRIPT

But hold on, that looks only partly deobfuscated!

A Minor Complication

Okay, I may have lied to you a bit. The example I gave you actually contains two cases. The simplest case with ONLY string literals:

const examplePerson = new Person(
  "D" + "a" + "vi" + "d",
  "Un" + "ive" + "rsi" + "ty" + " o" + "f " + "Ob" + "fus" + "cat" + "ion",
  "D" + "O" + "G" + "G" + "O"
);

JAVASCRIPT

And the bit more advanced case, where string literals are mixed with non-string literals (in this case, variables):

let helloStatement =
  "Hello, my name is " +
  this.name +
  ". I g" +
  "o t" +
  "o " +
  this.school +
  " an" +
  "d " +
  "m" +
  "y" +
  " fa" +
  "vo" +
  "ur" +
  "ite" +
  " ani" +
  "ma" +
  "l" +
  " is" +
  " a " +
  this.favAnimal;

JAVASCRIPT

The above algorithm will not work for the second case as is. However, there’s a simple remedy. Simply edit the obfuscated file to wrap consecutive strings in brackets like so:

let helloStatement =
  "Hello, my name is " +
  this.name +
  (". I g" + "o t" + "o ") +
  this.school +
  (" an" +
    "d " +
    "m" +
    "y" +
    " fa" +
    "vo" +
    "ur" +
    "ite" +
    " ani" +
    "ma" +
    "l" +
    " is" +
    " a ") +
  this.favAnimal;

JAVASCRIPT

And our deobfuscator will output our desired result:

let helloStatement =
  "Hello, my name is " +
  this.name +
  ". I go to " +
  this.school +
  " and my favourite animal is a " +
  this.favAnimal;

JAVASCRIPT

I’m sure some of you might be wondering why the algorithm doesn’t work without manually adding the brackets. This is outside of the scope of this article. However, if you’re interested in the reason for this intricacy and an algorithm that simplifies it without needing to manually add the brackets, check out my article about Constant Folding. But for now, I’ll move on to another example.

Example #4: String Encryption

First and foremost, string encryption IS NOT the same as encoding strings as hexadecimal or unicode. Whereas the javascript interpreter will automatically interpret"\x48\x65\x6c\x6c\x6f" as "Hello", encrypted strings must be passed through to a decryption function and evaluated before they become useful to the javascript engine (or representable as a StringLiteral by Babel).

For example, even though Base64 is a type of encoding, in the context of string concealing it falls under string encryption since console.log("SGVsbG8=") prints SGVsbG8=, but console.log(atob{SGVsbG8=}) prints Hello. In this example, atob() is the decoding function.

Most obfuscators will define custom functions for encrypting and decrypting strings. Sometimes, the string may need to go through multiple decryption functions Therefore, there is no universal solution for deobfuscating string encryption. Most of the time, you’ll need to manually analyze the code to find the string decryption function, hard-code it into your deobfuscator, then evaluate it for each CallExpression that references it. The example below will cover a single example that uses an XOR cipher from this repository for obfuscating the strings.

Original Source Code

/**
 * "Input.js"
 * Original, unobfuscated code.
 *
*/

class Person {
  constructor(name, school, animal) {
    this.name = name;
    this.school = school;
    this.favAnimal = animal;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

const examplePerson = new Person("David", "University of Obfuscation", "DOGGO");

examplePerson.sayHello();

JAVASCRIPT

Post-Obfuscation Code

/**
 * "stringEncryptionObfuscated.js"
 * This is the resulting code after obfuscation.
 *
*/

/**
 * The decryption function
 * A simple implementation of an XOR cipher.
 * @param _0xed68x1 The string to be decrypted
 * @param _0xed68x2 The decryption key
*/
function _0x2720d7(_0xed68x1, _0xed68x2) {
  var _0xed68x3 = "";
  if (!_0xed68x2) {
    _0xed68x2 = 6;
  }
  for (var _0xed68x4 = 0; _0xed68x4 < _0xed68x1["length"]; ++_0xed68x4) {
    _0xed68x3 += String["fromCharCode"](
      _0xed68x2 ^ _0xed68x1["charCodeAt"](_0xed68x4)
    );
  }
  return _0xed68x3;
}
class Person {
  constructor(name, school, animal) {
    this[_0x2720d7("댎댁댍댅", 438971164636e3)] = name;
    this[_0x2720d7("敷敧敬敫敫敨", 298471289414916)] = school;
    this[_0x2720d7("옞옙옎옹옖옑옕옙옔", 834789504173688)] = animal;
  }
  sayHello() {
    let helloStatement =
      _0x2720d7("ᵅᵨᵡᵡᵢᴡᴭᵠᵴᴭᵣᵬᵠᵨᴭᵤᵾᴭ", 12786957) +
      this[_0x2720d7("のちねづ", 468128861335552)] +
      _0x2720d7("຅຋໢຋໌ໄ຋ໟໄ຋", 88739499) +
      this[_0x2720d7("噥噵噾噹噹噺", 327790472222230)] +
      _0x2720d7(
        "汚氛气氞汚気氃汚氜氛氌氕氏氈氓氎氟汚氛气氓気氛氖汚氓氉汚氛汚",
        38694010
      ) +
      this[_0x2720d7("녠녧녰녇녨녯녫녧녪", 148377547550982)];
    console[_0x2720d7("㐠㐣㐫", 21889598764108)](helloStatement);
  }
}
const examplePerson = new Person(
  _0x2720d7("幕幰幧幸幵", 33775121),
  _0x2720d7("﹪﹑﹖﹉﹚﹍﹌﹖﹋﹆︟﹐﹙︟ﹰ﹝﹙﹊﹌﹜﹞﹋﹖﹐﹑", 46595647),
  _0x2720d7("Ⳑⳛⳓⳓⳛ", 85339284)
);
examplePerson[_0x2720d7("릪릸릠릑림릵릵릶", 803843901012441)]();

JAVASCRIPT

Analysis Methodology

Let’s paste our obfuscated code into AST Explorer.

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

Our targets of interest here are the cryptic calls to the _0x2720d7 function. Let’s take a closer a closer look at one of them.

正在上传…重新上传取消

A closer look at one of the nodes of interest

We can observe that the nodes of interest are of type CallExpression. Each call expression takes in two parameters. The first is a StringLiteral which holds the encrypted string. The second is a NumericLiteral, which is used as a decryption key.

There are two ways we can deobfuscate this script, the second of which I personally prefer since it looks cleaner.

Method #1: The Copy-Paste Technique

The first method involves the following steps:

Find the decryption function in the obfuscated script
Paste the decryption function, _0x2720d7, in our deobfuscator
Traverse the ast in search for the FunctionDeclaration of the decryption function (in this case, _0x2720d7). Once found, remove the path as it is no longer necessary
Traverse the ast in search of CallExpressions where the callee is the decryption function (in this case, _0x2720d7). Once found:
1. Assign each arugument of path.node.arguments to a variable, e.g. stringToDecrypt and decryptionKey respectively.
2. Create a variable, result
3. Evaluate _0x2720d7(stringToDecrypt,decryptionKey) and assign the resulting value to result
4. Replace the CallExpression path with the actual value: path.replaceWith(t.valueToNode(result))

One of the reasons I don’t like to use this method is that the code for the deobfuscator can become quite long and messy if:

The decryption function contains many lines of code, or
There are many parameters to parse from the CallExpression

A cleaner approach in my opinion is the next method, which evaluates the decryption function and its calls in a virtual machine.

Method #2: Using the NodeJS VM module

Whenever possible, I prefer to use this method because of its cleanliness. Why? Well,

It doesn’t require me to copy-paste the entire encryption function into my deobfuscator
I don’t need to manually parse any of the arguments of CallExpressions before execution.

The only downside is that it requires two separate visitors and therefore two traversals, whereas you can probably implement the first method in a single traversal.

Here are the steps to implement it:

Create a variable, decryptFuncCtxand assign it an empty context using vm.createContext()
Traverse the ast in search for the FunctionDeclaration of the decryption function (in this case, _0x2720d7). Once found:
1. Use @babel/generator to generate the function’s source code from the node and assign it to a variable, decryptFuncCode
2. Add the decryption function to the VM’s context using vm.runInContext(decryptFuncCode, decryptFuncCtx)
3. Delete the FunctionDeclaration node with path.remove() as it’s now useless, and stop traversing with path.stop()
Traverse the ast in search of CallExpressions where the callee is the decryption function (in this case, _0x2720d7). Once found:
1. Use @babel/generator to generate the CallExpression’s source code from the node and assign it to a variable, expressionCode
2. Evaluate the function call in the context of decryptFuncCtx using vm.runInContext(expressionCode,decryptFuncCtx).
3. Optionally assign the result to a variable, value
4. Replace the CallExpression node with the computed value to restore the unobfuscated string literal.

Note: for both of these methods you should probably come up with a dynamic way to detect the decryption function (by analyzing the structure of the function node or # of calls) in case the script is morphing. You should also pay mind to the scope of function and also check if it’s ever redefined later in the script. But for this example, I will neglect that and just hardcode the name for simplicity.

The babel implementation for the second method is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");
const vm = require("vm");
const { create } = require("chrome-remote-interface-extra/lib/page/Page");
/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  const decryptFuncCtx = vm.createContext();
  // Visitor for populating the VM context
  const createDecryptFuncCtxVisitor = {
    FunctionDeclaration(path) {
      const node = path.node;
      if (node.id.name == "_0x2720d7") {// Hard-coded decryption function name for simplification

        const decryptFuncCode = generate(node).code; // Generate the code to execute in context
        vm.runInContext(decryptFuncCode, decryptFuncCtx); // Execute the decryption function delcaration in VM context
        path.remove() // Remove the decryption function since it has served its use
        path.stop(); // stop traversing once the decryption function has been added to the context

      }
    },
  };

  // Visitor for decrypting the string
  const deobfuscateEncryptedStringsVisitor = {
    CallExpression(path) {
      const node = path.node;
      if (node.callee.name == "_0x2720d7") { // Hard-coded decryption function name for simplification


        const expressionCode = generate(node).code; // Convert the CallExpression to code
        const value = vm.runInContext(expressionCode, decryptFuncCtx); // Evaluate the code
        path.replaceWith(t.valueToNode(value)); // Replace the node with the resulting value.
      }
    },
  };
  // Create the context
  traverse(ast, createDecryptFuncCtxVisitor);
  // Decrypt all strings
  traverse(ast, deobfuscateEncryptedStringsVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./stringEncryptionObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

class Person {
  constructor(name, school, animal) {
    this["name"] = name;
    this["school"] = school;
    this["favAnimal"] = animal;
  }

  sayHello() {
    let helloStatement = "Hello, my name is " + this["name"] + ". I go to " + this["school"] + " and my favourite animal is a " + this["favAnimal"];
    console["log"](helloStatement);
  }

}

const examplePerson = new Person("David", "University of Obfuscation", "DOGGO");
examplePerson["sayHello"]();

JAVASCRIPT

The strings are now deobfuscated, and the code becomes much easier to read.

Conclusion

Phew, that was quite the long segment! That about sums up the majority of string concealing techniques you’ll find in the wild and how to reverse them.

Before I go, I want to address one thing (as a bonus of sorts):

After deobfuscating the strings, we can see that they’re restored to:

1
2
3

this["name"] = name;
this["school"] = school;
this["favAnimal"] = animal;

JAVASCRIPT

But someone familiar with Javascript knows that the convention is to write it like this:

1
2
3

this.name = name;
this.school = school;
this.favAnimal = animal;

JAVASCRIPT

The good news is, you can also use Babel to restore the traditional dot operator formatting in MemberExpressions. Read my article about it here!

If you’re interested, you can find the source code for all the examples in this repository.

I hope that this article helped you learn something new. Thanks for reading, and happy reversing!

---------------------------------------------------------------------------------------------------------------------------------

Preface

This article assumes a preliminary understanding of Abstract Syntax Tree structure and BabelJS. Click Here to read my introductory article on the usage of Babel.

I’ll be honest, this transformation is less “deobfuscation” than it is “making our script look slightly prettier”. Even so, I think that it’s worth mentioning since it’s still an interesting example of how to manipulate an abstract syntax tree with babel. So, without further ado, let’s get into it!

Bracket Notation vs. Dot Notation

Let’s say we have the following piece of Javascript:

let foo = [1, 2, 3, 4, 5, 6, 7, 8, 9];
let bar = window["navigator"]["userAgent"];
let baz = Date["now"]();
let qux = foo[3];

JAVASCRIPT

The above code is an example of using bracket notation to access the properties of an object.
If you’re familiar with Javascript, you probably know that the code above can also be written like this:

let foo = [1, 2, 3];
let bar = window.navigator.userAgent;
let baz = Date.now();
let qux = foo[3];

JAVASCRIPT

The second snippet looks much cleaner because it isn’t littered with brackets and quotation marks. That’s why most programmers prefer the second snippet’s formatting over that of the first (unless, of course, you’re a psychopath).

So, how can we go about automatically transforming the first snippet to look like the second snippet? Let’s walk through the process step by step together.

Analysis Methodology

Let’s start by pasting both snippets into AST Explorer to see the differences. First, we’ll investigate the first snippet. Our targets of interest are the first and second lines of the script:

正在上传…重新上传取消

A closer look at one of the nodes of interest; first code snippet

So, we can see that our nodes of interest are of type MemberExpression. Each node has 3 important properties:

path.node.object, which stores the object being accessed (in this case, the Date object)
path.node.property, which stores the property to access (in this case, it’s the Now property and of type StringLiteral)
path.node.computed, which tells us whether the MemberExpression is computed, (i.e, a value of true means uses bracket notation) or not (i.e, a value of false means use dot notation). In this case, the computed property is set to true since bracket notation is being used.

Now, let’s analyze the same node on AST explorer, but for the second code snippet:

正在上传…重新上传取消

A closer look at one of the nodes of interest; second code snippet

After looking at it, we can observe that they’re nearly identical. But there are two important differences in the second case:

path.node.property is of type Identifier, NOT StringLiteral.
path.node.computed is set to false, NOT true.

This makes sense, since path.node.computed being false means that dot notation will be used. The property must then be of type Identifier, because you can only access properties via the dot operator with a valid identifier name.

So, using those observations, we can come up with the following logic to write a babel plugin to restore the code:

Traverse the AST for MemberExpressions
Once a MemberExpression is found:
1. Verify that the path.node.computed property is set to true. If it isn’t skip that node by returning.
2. Very that path.node.property is of type StringLiteral. If it isn’t, skip that node by returning.
3. Verify that the value of path.node.property meets all the requirements of a valid identifier. This can be done by testing it against the regex for a valid identifier, taken from this Stack Overflow answer
4. Use path.replaceWith() to replace the MemberExpression with a new one, where:
  - The object is still path.node.object
  - path.node.property is an Identifier node, with the name set to the value stored in path.node.property.value
  - path.node.computed is equal to false

The babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);
  const validIdentifierRegex =
    /^(?!(?:do|if|in|for|let|new|try|var|case|else|enum|eval|false|null|this|true|void|with|break|catch|class|const|super|throw|while|yield|delete|export|import|public|return|static|switch|typeof|default|extends|finally|package|private|continue|debugger|function|arguments|interface|protected|implements|instanceof)$)[$A-Z\_a-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc][$A-Z\_a-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc0-9\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u064b-\u0669\u0670\u06d6-\u06dc\u06df-\u06e4\u06e7\u06e8\u06ea-\u06ed\u06f0-\u06f9\u0711\u0730-\u074a\u07a6-\u07b0\u07c0-\u07c9\u07eb-\u07f3\u0816-\u0819\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0859-\u085b\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09cb-\u09cd\u09d7\u09e2\u09e3\u09e6-\u09ef\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b62\u0b63\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c3e-\u0c44\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d3e-\u0d44\u0d46-\u0d48\u0d4a-\u0d4d\u0d57\u0d62\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0e50-\u0e59\u0eb1\u0eb4-\u0eb9\u0ebb\u0ebc\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f3e\u0f3f\u0f71-\u0f84\u0f86\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u102b-\u103e\u1040-\u1049\u1056-\u1059\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17b4-\u17d3\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u18a9\u1920-\u192b\u1930-\u193b\u1946-\u194f\u19b0-\u19c0\u19c8\u19c9\u19d0-\u19d9\u1a17-\u1a1b\u1a55-\u1a5e\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b00-\u1b04\u1b34-\u1b44\u1b50-\u1b59\u1b6b-\u1b73\u1b80-\u1b82\u1ba1-\u1bad\u1bb0-\u1bb9\u1be6-\u1bf3\u1c24-\u1c37\u1c40-\u1c49\u1c50-\u1c59\u1cd0-\u1cd2\u1cd4-\u1ce8\u1ced\u1cf2-\u1cf4\u1dc0-\u1de6\u1dfc-\u1dff\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2cef-\u2cf1\u2d7f\u2de0-\u2dff\u302a-\u302f\u3099\u309a\ua620-\ua629\ua66f\ua674-\ua67d\ua69f\ua6f0\ua6f1\ua802\ua806\ua80b\ua823-\ua827\ua880\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8e0-\ua8f1\ua900-\ua909\ua926-\ua92d\ua947-\ua953\ua980-\ua983\ua9b3-\ua9c0\ua9d0-\ua9d9\uaa29-\uaa36\uaa43\uaa4c\uaa4d\uaa50-\uaa59\uaa7b\uaab0\uaab2-\uaab4\uaab7\uaab8\uaabe\uaabf\uaac1\uaaeb-\uaaef\uaaf5\uaaf6\uabe3-\uabea\uabec\uabed\uabf0-\uabf9\ufb1e\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f]*$/;

  const bracketToDotVisitor = {
    MemberExpression(path) {
      let { object, property, computed } = path.node;
      if (!computed) return; // Verify computed property is false
      if (!t.isStringLiteral(property)) return; // Verify property is a string literal
      if (!validIdentifierRegex.test(property.value)) return; // Verify that the property being accessed is a valid identifier

      // If conditions pass:

      // Replace the node with a new one
      path.replaceWith(
        t.MemberExpression(object, t.identifier(property.value), false)
      );
    },
  };

  // Execute the visitor
  traverse(ast, bracketToDotVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./obfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

let foo = [1, 2, 3, 4, 5, 6, 7, 8, 9];
let bar = window.navigator.userAgent;
let baz = Date.now();
let qux = foo[3];

JAVASCRIPT

And the dot operator is restored!

Conclusion

So, today we discussed a simple but efficient way to restore the dot operator for MemberExpressions. After converting bracket notation to dot notation, it becomes a lot easier for us to differentiate between object member accessors and array element accessors. This is because of our StringLiteral check on `path.node.property’.

I’ll leave you with one useful piece of advice. You should probably run this plugin as a sort of “clean up transformer”, only after replacing all constant variables with their actual value. This is because if you don’t substitute in the actual value of a variable, this plugin won’t have any useful effect. For example, running the plugin on this code:

1 2	`let foo = "navigator"; let bar = window[foo];` JAVASCRIPT

Will give us:

1 2	`let foo = "navigator"; let bar = window[foo];` JAVASCRIPT

Which is the exact same thing. To restore it to let bar = window.navigator, we first must replace all references to the constant variable foo with its actual value, "navigator". If you want to learn how to do that, you can read my article on replacing constant variables with their actual value.

If you’re interested, you can find the source code for all the examples in this repository.

I hope this article helped you learn something new. Thanks for reading, and happy reversing!

--------------------------------------------------------------------------------------------------------------------------

Preface

This article assumes a preliminary understanding of Abstract Syntax Tree structure and BabelJS. Click Here to read my introductory article on the usage of Babel.

What is Constant Folding?

Constant Folding: “An optimization technique that eliminates expressions that calculate a value that can already be determined before code execution.” (Source)

To better explain constant folding, it’s perhaps more useful to first introduce the obfuscation technique that constant folding fights against. Take the following code for example:

Examples

Example #1: The Basic Case

/**
 * "Input.js"
 * Original, unobfuscated code.
 *
 */
let foo = 27;
let bar = 4;
let baz = "I am a string literal, totally whole!";

JAVASCRIPT

An obfuscator may split each constant value into multiple binary expressions, which could look something like this after obfuscation:

/**
 * splitConstantsObfuscated.js"
 * This is the resulting code after obfuscation.
 *
 */
let foo = 12373561 ^ (12373561 * 13 + 3 * 7) ^ 153794264;
let bar =
  (535 + false) ^
  (2318 + true * -1399 + true) ^
  (1321 - 1234 / 2340 + true + true * 50 + false) ^
  1232;
let baz =
  "I " +
  "a" +
  "m" +
  " a" +
  " st" +
  "ri" +
  "ng" +
  " lite" +
  "ra" +
  "l," +
  " tot" +
  "al" +
  "l" +
  "y" +
  " wh" +
  "ol" +
  "e" +
  "!";

JAVASCRIPT

As you can see, the obfuscation has transformed what used to be easy to read constants; 27, 4, "I am a string literal, totally whole!" ; into multiple expressions with arithmetic and bitwise operators. The code is even using mathematical operators on booleans! Someone reading the code would likely need to evaluate each expression in a debugger to figure out the value of each variable. Let’s paste the second snippet in the dev tools console to check:

正在上传…重新上传取消

Checking the evaluated values in the DevTools console

We can observe that each variable in the second snippet has an equivalent ability to that of its first snippet counterpart. The way the javascript engine simplified the expressions down to a constant is the essence of Constant Folding.

Now, hypothetically, you could just evaluate each expression in a javascript interpreter and replace it by hand manually. And sure, you could do that in just a few seconds for the snippet I provided. But that isn’t a feasible solution if there were hundreds or even thousands of lines of code in a program similar to this. Thankfully for us, Babel has an inbuilt feature that can help us automate the simplification.

Analysis Methodology

Let’s start by pasting the obfuscated sample into AST Explorer

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

If we click on one of the expression chunks on the right-hand side of the assignment expressions, we can take a closer look at the AST structure:

正在上传…重新上传取消

A closer look at one of the nodes of interest

We can see that in this case, the small chunk of the string is of type StringLiteral, and it’s contained inside a bunch of nested BinaryExpression nodes. If we look at any other fraction of the other expressions, we can observe two important commonalities

A constant value, or Literal (e.g. StringLiteral,NumericLiteral, or BooleanLiteral)
The Literal is contained inside a single or nested BinaryExpression(s).

Our final goal is to evaluate all the binary expressions to reduce each right-hand side expression to a constant Literal value. Based on the nested nature of the BinaryExpressions, you might be thinking of manually writing a recursive algorithm. However, there’s a much simpler way to accomplish the same effect using Babel’s inbuilt path.evaluate() function. Here’s how we’re going to use it:

Traverse through the AST to search for BinaryExpressions
If a BinaryExpression is encountered, try to evaluate it using path.evaluate().
Check if it returns confident:true. If confident is false, skip the node by returning.
Create a node from the value using t.valueToNode(value) to infer the type, and assign it to a new variable, valueNode
Check that the resulting valueNode is a Literal type. If the check returns false skip the node by returning.
- This will cover StringLiteral, NumericLiteral, BooleanLiteral etc. types and skip over others that would result from invalid operations (e.g. t.valueToNode(Infinity) is of type BinaryExpression, t.valueToNode(undefined) is of type identifier)
Replace the BinaryExpression node with our newly created `valueNode’.
The babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for constant folding
  const foldConstantsVisitor = {
    BinaryExpression(path) {
      let { confident, value } = path.evaluate(); // Evaluate the binary expression
      if (!confident) return; // Skip if not confident
      let actualVal = t.valueToNode(value); // Create a new node, infer the type
      if (!t.isLiteral(actualVal)) return; // Skip if not a Literal type (e.g. StringLiteral, NumericLiteral, Boolean Literal etc.)
      path.replaceWith(actualVal); // Replace the BinaryExpression with the simplified value
    },
  };

  // Execute the visitor
  traverse(ast, foldConstantsVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing f ile", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./splitConstantsObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

1
2
3

let foo = 27;
let bar = 4;
let baz = "I am a string literal, totally whole!";

JAVASCRIPT

And the original code is completely restored!

Example #2: A Confident Complication

If you’ve read my article on String Concealing, specifically the section on String Concatenation, you may know that you can encounter a problem using the babel script above.

Let’s say you have a code snippet like this:

/**
 * Snippet 1
 */
class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I g" +
      "o t" +
      "o " +
      this.school +
      " an" +
      "d " +
      "m" +
      "y" +
      " fa" +
      "vo" +
      "ur" +
      "ite" +
      " ani" +
      "ma" +
      "l" +
      " is" +
      " a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

By manual inspection, you can probably deduce that it can be reduced to this:

/**
 * Snippet 1, manually deobfuscated
 */
class Person {
  constructor(name, school, animal) {
    this.name = name;
    this.school = school;
    this.favAnimal = animal;
  }
  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

However, if we try running the deobfuscator we made above against the obfuscated snippet, it yields this result:

/**
 * Snippet 1, processed through the deobfuscator
 */
class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I g" +
      "o t" +
      "o " +
      this.school +
      " an" +
      "d " +
      "m" +
      "y" +
      " fa" +
      "vo" +
      "ur" +
      "ite" +
      " ani" +
      "ma" +
      "l" +
      " is" +
      " a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

It hasn’t been simplified at all! But why?

Where The Issue Lies

To figure out what the problem is, let’s use a debugger and set breakpoints to try and understand what our deobfuscator is actually doing.

正在上传…重新上传取消

Placing a debugger statement

We know that our visitor is acting on nodes of type BinaryExpression. A binary expression always has 3 main components: a left side, a right side, and an operator. For our example, the operator is always addition, +. On each iteration, let’s run these commands in the debug console to check what our left and right side are.

generate(path.node).code
generate(path.node.left).code
generate(path.node.right).code

Below is a screencap of what the first two iterations would look like:

正在上传…重新上传取消

The first and second pause

When the visitor is first called, path.evaluate() will not return a value and the confident return value will be false. A false value for confident arises when the expression to be evaluated contains a variable whose value is currently unknown, and therefore Babel cannot be “confident” when attempting to compute an expression containing it. In the case of the first expression, the unknown variable (this.favAnimal) on the right side of the expression, and two unknown variables: (this.name & this.school) on the left side of the expression prevent path.evaluate() for returning a literal value. When the debugger statement is reached for a second time, the right-hand side of the expression is a StringLiteral ("a"). However, the left-hand side still contains variables with an unknown value. If we were to continue this for each time the breakpoint is encountered, the structure would look like this:

Iteration	Left Side	Operator	Right Side
1	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is” + “ a “	+	this.favAnimal
2	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is”	+	“ a”
3	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l”	+	“ is”
4	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma”	+	“l”
5	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani”	+	“ma”
6	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite”	+	“ ani”
7	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur”	+	“ite”
8	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo”	+	“ur”
9	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa”	+	“vo”
10	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y”	+	“ fa”
11	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m”	+	“y”
12	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “	+	“m”
13	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an”	+	“d “
14	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school	+	“ an”
14	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “	+	this.school
14	“Hello, my name is “ + this.name + “. I g” + “o t”	+	“o “
14	“Hello, my name is “ + this.name + “. I g”	+	“o t”
14	“Hello, my name is “ + this.name	+	“. I g”
14	“Hello, my name is “	+	this.name

It’s evident that at every encounter, one of the sides will always contain a variable of unknown value. Therefore, path.evaluate() will return confident: false and be useless in simplifying the expression. So, we’ll need to try something else.

Constructing the Solution

Idea #1: Prioritizing Chunks of Consecutive Literals

We know that the issue lies with one of the sides containing a variable. However, we can see that there are chunks of the code that contain consecutive string literals only:

//Chunk 1
"Hello, my name is ";
//Chunk 2
". I g" + "o t" + "o ";

// Chunk 3
" an" +
  "d " +
  "m" +
  "y" +
  " fa" +
  "vo" +
  "ur" +
  "ite" +
  " ani" +
  "ma" +
  "l" +
  " is" +
  " a ";

JAVASCRIPT

If there were some way to prioritize these smaller chunks, then surely path.evaluate() would be able to simplify them. This is indeed the case, as we can prove this by manually wrapping each of these chunks in parentheses to force them to be evaluated first:

/**
 * Snippet 2: wrapping consecutive strings in parentheses
 */
class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      (". I g" + "o t" + "o ") +
      this.school +
      (" an" +
        "d " +
        "m" +
        "y" +
        " fa" +
        "vo" +
        "ur" +
        "ite" +
        " ani" +
        "ma" +
        "l" +
        " is" +
        " a ") +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

Running this through the deobfuscator, we get our desired result:

/**
 * Snippet 2, processed through the deobfuscator.
 */
class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

Alright, so that did the job. But for very long binary expressions which you might encounter in wild obfuscated scripts, you certainly do not want to have to spend time manually wrapping chunks of consecutive strings in parentheses. Sure, you could probably automate it with Regex, or write an AST-based algorithm to add brackets to the source string, but there has to be a less complicated way, right?

The answer: Yes, there is. And we are on the right track.

Idea #2: Even Smaller Chunks

Okay, so we know that trying to pinpoint and prioritize chunks of consecutive strings with varying lengths can be troublesome. But what if we just split the binary expression into the smallest possible pieces and prioritized those?

I’ll admit, that probably sounds confusing. So I’ll do my best to explain what I mean with an example.

We know that a binary expression, in its simplest form, consists of a left side, an operator, and a right side. Let’s refer back to the first three rows of the table we made:

Iteration	Left Side	Operator	Right Side
1	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is” + “ a “	+	this.favAnimal
2	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is”	+	“ a”
3	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l”	+	“ is”

The right side of our binary expression is always only one element long, and is either a literal value or an identifier. However, the left side isn’t a single element long. Rather, it’s also a binary expression, containing both literal values and identifiers. What I propose is developing an algorithm to ensure that both the left side and right side are only a single element long. Then, if both are string literals, we can concatenate them. If not, we can simply move on.

To do this, let us look at the above table again, but only the first two rows. However, for the left side, let’s only take into consideration the right-most edge of the expression. That would look something like this:

Iteration	Right Edge of Left Side	Operator	Right Side
1	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is” + “ a “	+	this.favAnimal
2	“Hello, my name is “ + this.name + “. I g” + “o t” + “o “ + this.school + “ an” + “d “ + “m” + “y” + “ fa” + “vo” + “ur” + “ite” + “ ani” + “ma” + “l” + “ is”	+	“ a”

If we were to evaluate these now, we would observe the following:

Row 1 still cannot be evaluated, since it is a StringLiteral + a variable of unknown value.
Row 2 can be simplified into a single string literal: " is" + " a" => " is a"

We can then replace the right-most edge of the left side with the simplified result, so it will become the right side in the next iteration. This way, we can simplify consecutive concatenation of strings step by step. Keep in mind, that each simplification will affect the next iteration’s right side. The reason I included the first few rows and not the third, is because the simplification in the second iteration would change the right side of the third iteration, so it would no longer look like the original table.

Following the new algorithm, each iteration of our visitor would look like this:

Iteration	Right Edge of Left Side	Operator	Right Side
1	“ a “	+	this.favAnimal
2	“ is”	+	“ a”
3	“l”	+	“ is a”
4	“ma”	+	“l is a”
5	“ ani”	+	“mal is a”
6	“ite”	+	“ animal is a”
7	“ur”	+	“ite animal is a”
8	“vo”	+	“urite animal is a”
9	“ fa”	+	“vourite animal is a”
10	“y”	+	“ favourite animal is a”
11	“m”	+	“y favourite animal is a”
12	“d “	+	“my favourite animal is a”
13	“ an”	+	“d my favourite animal is a”
14	this.school	+	“ and my favourite animal is a”
14	“o “	+	this.school
14	“o t”	+	“o “
14	“. I g”	+	“o to “
14	this.name	+	“. I go to “
14	“Hello, my name is “	+	this.name

So, by following this algorithm, we should be able to combine all consecutive string literals into one.

WAIT! There’s something wrong here…

The line of code we start with is let helloStatement = "Hello, my name is " + this.name + ". I g" + "o t" + "o " + this.school + " an" + "d " + "m" + "y" + " fa" + "vo" + "ur" + "ite" + " ani" + "ma" + "l" + " is" + " a " + this.favAnimal;

We know that on the second iteration, " is" and " a" will be concatenated into a single string literal, then the right-most edge of the left side will be replaced with the resulting value. That is,

" is" => " is a"

The problem here is that we are adding the right side of the expression to the right edge of the left side of the expression. However, the original right side remains unchanged despite already being accounted for. The code after one iteration would then look like this:

let helloStatement = "Hello, my name is " + this.name + ". I g" + "o t" + "o " + this.school + " an" + "d " + "m" + "y" + " fa" + "vo" + "ur" + "ite" + " ani" + "ma" + "l" + " is a " + " a " + this.favAnimal;

Notice the extra duplicate near the end, " is a " + " a ". To fix this, we need to ensure that we delete the original right side of the expression after doing the concatenation and replacement.

So, based on this logic, the correct steps for creating the deobfuscator are as follows:

Writing The Deobfuscator Logic

Traverse the ast in search of BinaryExpressions. When one is encountered:
1. If both the right side (path.node.right) and the left side (path.node.left) are of type StringLiteral, we can use the algorithm for the basic case.
2. If not:
  1. Check if the right side, (path.node.right) is a StringLiteral. If it isn’t, skip this node by returning.
  2. Check if the right-most edge of the left-side (path.node.left.right) is a StringLiteral. If it isn’t, skip this node by returning.
  3. Check if the operator is addition (+). If it isn’t, skip this node by returning.
  4. Evaluate the right-most edge of the left-side + the right side; path.node.left.right.value + path.node.right.value and assign it’s StringLiteral representation to a variable, concatResult.
  5. Replace the right-most edge of the left-side with concatResult.
  6. Remove the original right side of the expression as it is now a duplicate.

The Babel implementation is as follows:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for constant folding
  const foldConstantsVisitor = {
    BinaryExpression(path) {
      const left = path.get("left");
      const right = path.get("right");
      const operator = path.get("operator").node;

      if (t.isStringLiteral(left.node) && t.isStringLiteral(right.node)) {
        // In this case, we can use the old algorithm
        // Evaluate the binary expression
        let { confident, value } = path.evaluate();
        // Skip if not confident
        if (!confident) return;
        // Create a new node, infer the type
        let actualVal = t.valueToNode(value);
        // Skip if not a Literal type (e.g. StringLiteral, NumericLiteral, Boolean Literal etc.)
        if (!t.isStringLiteral(actualVal)) return;
        // Replace the BinaryExpression with the simplified value
        path.replaceWith(actualVal);
      } else {
        // Check if the right side is a StringLiteral. If it isn't, skip this node by returning.
        if (!t.isStringLiteral(right.node)) return;
        //Check if the right sideis a StringLiteral. If it isn't, skip this node by returning.
        if (!t.isStringLiteral(left.node.right)) return;
        // Check if the operator is addition (+). If it isn't, skip this node by returning.
        if (operator !== "+") return;

        // If all conditions are fine:

        // Evaluate the _right-most edge of the left-side_ + the right side;
        let concatResult = t.StringLiteral(
          left.node.right.value + right.node.value
        );
        // Replace the _right-most edge of the left-side_ with `concatResult`.
        left.get("right").replaceWith(concatResult);
        //Remove the original right side of the expression as it is now a duplicate.
        right.remove();
      }
    },
  };

  // Execute the visitor
  traverse(ast, foldConstantsVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./splitStringsObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

class Person {
  constructor(name, school, emoji) {
    this.name = name;
    this.school = school;
    this.favAnimal = emoji;
  }

  sayHello() {
    let helloStatement =
      "Hello, my name is " +
      this.name +
      ". I go to " +
      this.school +
      " and my favourite animal is a " +
      this.favAnimal;
    console.log(helloStatement);
  }
}

JAVASCRIPT

And all consecutive StringLiterals have been concatenated! Huzzah!

Conclusion

Okay, I hope that second example wasn’t too confusing. Sometimes, you’ll be able to avoid the problem with unknown variable values by replacing references to a constant variable with their actual value. If you’re interested in that, you can read my article about it here. In this case, however, it was unavoidable due to being within a class definition where variables have yet to be initialized.

Keep in mind that the second example will only work for String Literals and addition. But, it can easily be adapted to other node types and operators. I’ll leave that as a challenge for you, dear reader, if you wish to pursue it further 😉

If you’re interested, you can find the source code for all the examples in this repository.

Anyways, that’s all I have for you today. I hope that this article helped you learn something new. Thanks for reading, and happy reversing!

---------------------------------------------------------------------------

Preface

This article assumes a preliminary understanding of Abstract Syntax Tree structure and BabelJS. Click Here to read my introductory article on the usage of Babel.

Definition of a Constant Variable

For our purposes, a constant variable is any variable that meets all three of the following conditions:

The variable is declared AND initialized at the same time.
The variable is initialized to a literal value, e.g. StringLiteral, NumericLiteral, BooleanLiteral, etc.
The variable is never reassigned another value in the script

Therefore, a variable’s declaration keyword (let,var,const) has no bearing on whether or not it is a constant.

Here is a quick example:

const a = [1, 2, 3];
var d = 12;
let e = "String!";
let f = 13;
let g;

f += 2;

console.log(a, b, d, e, f);
g = 14;

JAVASCRIPT

In this example:

a is not a constant, since it’s initialized as an ArrayExpression, not a Literal
d is a constant, as it is declared and initialized to a NumericLiteral. Declaration and initialization happen at the same time. It is also never reassigned.
e is a constant, as it is declared and initialized to a StringLiteral. Declaration and initialization happen at the same time. It is also never reassigned.
f is not a constant, since it is reassigned after initialization: f+=2
g is not a constant, since it is not declared and initialized at the same time.

The reasoning for declared but uninitialized variables not counting as a constant is an important concept to understand. Take the following script as an example:

let foo; // Initialization

console.log(foo); // => undefined

foo = 2;

console.log(foo); // => 2

JAVASCRIPT

Console Output:

1 2	`undefined 2` ACTIONSCRIPT

If, in this case, we tried to substitute foo‘s initialization value (2) for each reference offoo:

let foo; // Initialization

console.log(2); // => 2, NOT undefined!

foo = 2;

console.log(2); // => 2

JAVASCRIPT

Console Output:

1 2	`2 2` TEXT

Which clearly breaks the original functionality of the script due to not accounting for the state of the variable at certain points in the script. Therefore, we must follow the 3 conditions when determining a constant variable.

I’ll now discuss an example where substituting in constant variables can be useful for deobfuscation purposes.

Examples

Let’s say we have a very simple, unobfuscated script that looks like this:

/**
 * "Input.js"
 * Original, unobfuscated code.
 *
 */
var url = "https://api.n0tar3als1t3.dev:1423/getData";
const req = function () {
  let random = Math.random() * 1000;
  var xhr = new XMLHttpRequest();
  xhr.open("GET", url);
  xhr.setRequestHeader("RandomInt", random);
  xhr.setRequestHeader("Accept", "text/html");
  xhr.setRequestHeader("Accept-Encoding", "gzip, deflate, br");
  xhr.setRequestHeader("Accept-Language", "en-US,en;q=0.9");
  xhr.setRequestHeader("Cache-Control", "no-cache");
  xhr.setRequestHeader("Connection", "keep-alive");
  xhr.setRequestHeader("Host", "n0tar3als1t3.dev");
  xhr.setRequestHeader("Pragma", "no-cache");
  xhr.setRequestHeader("Referer", "https://n0tar3als1t3.dev");
  xhr.setRequestHeader(
    `sec-ch-ua", "" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"`
  );
  xhr.setRequestHeader("sec-ch-ua-mobile", "?0");
  xhr.setRequestHeader("sec-ch-ua-platform", `"Windows"`);
  xhr.setRequestHeader("Sec-Fetch-Dest", "empty");
  xhr.setRequestHeader("Sec-Fetch-Mode", "cors");
  xhr.setRequestHeader("Sec-Fetch-Site", "same-origin");
  xhr.setRequestHeader(
    "User-Agent",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36"
  );

  xhr.onreadystatechange = function () {
    if (xhr.readyState === 4) {
      console.log(xhr.status);
      console.log(xhr.responseText);
    }
  };

  xhr.send();
};

JAVASCRIPT

We can obfuscate it by replacing all references to the string literals with references to constant variables:

/**
 * "constantReferencesObfuscated.js"
 * This is the resulting code after obfuscation.
 *
 */

const QY$e_yOs = "https://api.n0tar3als1t3.dev:1423/getData";
let apNykoxUn = "sec-ch-ua-mobile";
const zgDT = "Connection";
let A$E =
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36";
const XVyy$qGVDc = "Sec-Fetch-Dest";
var EkoMLkb = "Cache-Control";
let $jAONLEC = "Host";
var PGOSDhGVlcd = "https://n0tar3als1t3.dev";
const m$ua = "Accept-Encoding";
var Hw$seiMEes = "Pragma";
const ZHCx = "Sec-Fetch-Site";
var PfxQUj = "Referer";
const e_WXHbgheSe = "Accept";
const _VTGows = "GET";
var kphzJIkbgb = "gzip, deflate, br";

const req = function () {
  const SNgfg = "no-cache";
  let vOqEy = "text/html";
  const uugBXYcdsHp = "same-origin";
  const AH$HwC = "Accept-Language";
  var PnAJsD =
    'sec-ch-ua", "" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"';
  const Svno = "n0tar3als1t3.dev";
  let OTCqIvdmed = '"Windows"';
  let mVu = "RandomInt";
  const UgLln = "empty";
  const HwjBe = "?0";
  var QnXFnewjh = "Sec-Fetch-Mode";
  var lGhlU$gqPoK = "cors";
  const GcictYiOQ = "User-Agent";
  const AfYNl = "no-cache";
  var cLAVjnFa = "keep-alive";
  var V$lt = "en-US,en;q=0.9";
  const TlMBXe = "sec-ch-ua-platform";
  let random = Math.random() * 1000;
  var xhr = new XMLHttpRequest();
  var url = QY$e_yOs;
  xhr.open(_VTGows, url);
  xhr.setRequestHeader(mVu, random);
  xhr.setRequestHeader(e_WXHbgheSe, vOqEy);
  xhr.setRequestHeader(m$ua, kphzJIkbgb);
  xhr.setRequestHeader(AH$HwC, V$lt);
  xhr.setRequestHeader(EkoMLkb, SNgfg);
  xhr.setRequestHeader(zgDT, cLAVjnFa);
  xhr.setRequestHeader($jAONLEC, Svno);
  xhr.setRequestHeader(Hw$seiMEes, AfYNl);
  xhr.setRequestHeader(PfxQUj, PGOSDhGVlcd);
  xhr.setRequestHeader(PnAJsD);
  xhr.setRequestHeader(apNykoxUn, HwjBe);
  xhr.setRequestHeader(TlMBXe, OTCqIvdmed);
  xhr.setRequestHeader(XVyy$qGVDc, UgLln);
  xhr.setRequestHeader(QnXFnewjh, lGhlU$gqPoK);
  xhr.setRequestHeader(ZHCx, uugBXYcdsHp);
  xhr.setRequestHeader(GcictYiOQ, A$E);

  xhr.onreadystatechange = function () {
    if (xhr.readyState === 4) {
      console.log(xhr.status);
      console.log(xhr.responseText);
    }
  };

  xhr.send();
};

JAVASCRIPT

Analysis Methodology

Obviously, the obfuscated script is much more difficult to read. If you were to manually deobfuscate it, you’d have to search up each referenced variable and replace each occurrence of it with the actual variable. That could get tedious for a large number of variables, so we’re going to do it the Babel way. As always, let’s start by pasting the code into AST Explorer.

正在上传…重新上传取消

View of the obfuscated code in AST Explorer

Our targets of interest are the extra variable declarations. Let’s take a closer look at one of them:

正在上传…重新上传取消

A closer look at one of the nodes of interest

So, the target node type appears to be of type VariableDeclaration. However, each of these VariableDeclarations contains an array of VariableDeclarators. It is the VariableDeclarator that actually contains the information of the variables, including its id and init values. So, the actual node type we should focus on is VariableDeclarator.

Recall that we want to identify all constant variables, then replace all their references with their actual value. It’s important to note that variables in different scopes (e.g. local vs. global), may share the same name but have different values. So, the solution isn’t as simple as blindly replacing all matching identifiers with their initial value.

This would be a convoluted process if not for Babel’s ‘Scope’ API. I won’t dive too deep into the available scope APIs, but you can refer to the Babel Plugin Handbook to learn more about them. In our case, the scope.getBinding(${identifierName}) method will be incredibly useful for us, as it directly returns information regarding if a variable is constant and all of its references.

Putting all this knowledge together, the steps for creating the deobfuscator are as follows:

Traverse the ast in search of VariableDeclarators. If one is found:
1. Check if the variable is initialized. If it is, check that the initial value is a Literal type. If not, skip the node by returning.
2. Use the path.scope.getBinding(${identifierName}) method with the name of the current variable as the argument.
3. Store the returned constant and referencedPaths properties in their own respective variables.
4. Check if the constant property is true. If it isn’t, skip the node by returning.
5. Loop through all NodePaths in the referencedPaths array, and replace them with the current VariableDeclarator ‘s initial value (path.node.init)
6. After finishing the loop, remove the original VariableDeclarator node since it has no further use.

The babel implementation is shown below:

Babel Deobfuscation Script

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for replacing constants

  const replaceRefsToConstants = {
    VariableDeclarator(path) {
      const { id, init } = path.node;
      // Ensure the the variable is initialized to a Literal type.
      if (!t.isLiteral(init)) return;
      let {constant, referencePaths} = path.scope.getBinding(id.name);
      // Make sure it's constant
      if (!constant) return;
      // Loop through all references and replace them with the actual value.
      for (let referencedPath of referencePaths) {
        referencedPath.replaceWith(init);
      }
      // Delete the now useless VariableDeclarator
      path.remove();
    },
  };

  // Execute the visitor
  traverse(ast, replaceRefsToConstants);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./constantReferencesObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

const req = function () {
  let random = Math.random() * 1000;
  var xhr = new XMLHttpRequest();
  xhr.open("GET", "https://api.n0tar3als1t3.dev:1423/getData");
  xhr.setRequestHeader("RandomInt", random);
  xhr.setRequestHeader("Accept", "text/html");
  xhr.setRequestHeader("Accept-Encoding", "gzip, deflate, br");
  xhr.setRequestHeader("Accept-Language", "en-US,en;q=0.9");
  xhr.setRequestHeader("Cache-Control", "no-cache");
  xhr.setRequestHeader("Connection", "keep-alive");
  xhr.setRequestHeader("Host", "n0tar3als1t3.dev");
  xhr.setRequestHeader("Pragma", "no-cache");
  xhr.setRequestHeader("Referer", "https://n0tar3als1t3.dev");
  xhr.setRequestHeader(
    'sec-ch-ua", "" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"'
  );
  xhr.setRequestHeader("sec-ch-ua-mobile", "?0");
  xhr.setRequestHeader("sec-ch-ua-platform", '"Windows"');
  xhr.setRequestHeader("Sec-Fetch-Dest", "empty");
  xhr.setRequestHeader("Sec-Fetch-Mode", "cors");
  xhr.setRequestHeader("Sec-Fetch-Site", "same-origin");
  xhr.setRequestHeader(
    "User-Agent",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36"
  );

  xhr.onreadystatechange = function () {
    if (xhr.readyState === 4) {
      console.log(xhr.status);
      console.log(xhr.responseText);
    }
  };

  xhr.send();
};

JAVASCRIPT

And the code is restored. Even better than the original actually, since we substituted in the url variable too!

Conclusion

Substitution of constant variables is a must-know deobfuscation technique. It’ll usually be one of your first steps in the deobfuscation, combined with constant folding. If you would like to learn about constant folding, you can read my article about it here.

This article also gave a nice introduction to one of the useful Babel API methods. Unfortunately, there isn’t much good documentation out there aside from the Babel Plugin Handbook. However, you can discover a lot more useful features Babel has to offer by reading its source code, or using the debugger of an IDE to list and test helper methods (the latter of which I personally prefer 😄).

If you’re interested, you can find the source code for all the examples in this repository.

Okay, that’s all I have for you today. I hope that this article helped you learn something new. Thanks for reading, and happy reversing!

-------------------------‘’

Preface

This article assumes a preliminary understanding of Abstract Syntax Tree structure and BabelJS. Click Here to read my introductory article on the usage of Babel.

Definitions

Both dead code and unreachable code are obfuscation techniques relying on the injection of junk code that does not alter the main functionality of a program. Their only purpose is to bloat the appearance of the source code to make it more confusing for a human to analyze. Though being similar, there’s a slight difference between the two.

What is Dead Code?

Dead code is a section of code that is executed, but its results are never used in the rest of the program. In addition to increasing the file size, dead code also increases the program runtime and CPU usage since it is being executed.

What is unreachable code?

Unreachable code is a section of code that is never executed, since there is no existing control flow path that leads to its execution. This results in an increase in file size, but shouldn’t affect the runtime of the program since its contents are never executed.

Examples

Example 1: [Dead Code] Unused Variables and Functions

An unused variable/function is something that is declared (and often, but not always, initialized), but is never used in the program. Therefore, for a variable to be unused, it must:

Be constant, and
Have no references

The following obfuscated script contains many of them:

/**
 * unreferencedVariablesObfuscated.js
 * Lots of useless variables!
 */

var a = 3;
var b;
function useless1(yeet) {
  console.log(yeet);
  console.log("I'm useless :(");
}
const c = 7;
let d = 293;
const u = "My favourite";
var useless2 = function (bruh) {
  console.log(bruh.useless, ":(");
};

function notHello() {
  console.log("No!");
  console.log(i + lmaod, u, el, dgajd, fg + lmaod);
}
let dbgad = 23172;
let dgajd = "is";
var i = "Hello";
let vnajkfdhg;
var dnakd;
let bvs = new Date();
var h = "yeet";
let bv = 23;
var fg = "steak";
var lmaod = "!";
const el = "food";
let n = 1363;
let vch = "dghda" + 2;
let lol = performance.now();
const sayHello = function () {
  console.log(i + lmaod, u, el, dgajd, fg + lmaod);
};
let dga = 3653817;
let sfa = "362813";
sayHello();
let lmao;
var fldfioan;

JAVASCRIPT

Analysis Methodology

Let’s begin our analysis by pasting the obfuscated script into AST Explorer

正在上传…重新上传取消

A view of the obfuscated code in AST Explorer

We can observe from the AST structure that each new variable creation results in the creation of one of two types of nodes:

A VariableDeclaration, for variables assigned with let, var, and const. 2. Each of these VariableDeclarations contains an array of VariableDeclarators. It is the VariableDeclarator that actually contains the information of the variables, including its id and init values. So, we can make VariableDeclarator nodes our focus of interest to avoid unnecessary extra traversals.
A FunctionDeclaration, for functions declared with a function statement.

Based on this, we can deem our target node types of interest to be VariableDeclarator and FunctionDeclaration.

Recall that we want to identify all constant variables and non-referenced variables, then remove them. It’s important to note that variables in different scopes (e.g. local vs. global), may share the same name but have different values. So, we cannot simply base our solution on how many times a variable name occurs in a program.

We can use our observations to construct the following deobfuscation logic:

Traverse the AST for VariableDeclarators and FunctionDeclarations. Since both node types have both have the id property, we can write a single plugin for both.
- Tip: To write a function that works for multiple visitor nodes, we can add an | seperating them in the method name as a string like this: "VariableDeclarator|FunctionDeclaration"
Use the path.scope.getBinding(${identifierName}) method with the name of the current variable as the argument.
If the method returns constant as true and referenced as false, the declaration is considered to be dead code and can be safely removed with path.removed()

The babel implementation is shown below:

Babel Implementation

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");
const { Referenced } = require("@babel/traverse/lib/path/lib/virtual-types");
const { constants } = require("buffer");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for constant folding
  const removedUnusedVariablesVisitor = {
    "VariableDeclarator|FunctionDeclaration"(path) {
      const { node, scope } = path;
      const { constant, referenced } = scope.getBinding(node.id.name);
      // If the variable is constant and never referenced, remove it.
      if (constant && !referenced) {
        path.remove();
      }
    },
  };

  // Execute the visitor
  traverse(ast, removedUnusedVariablesVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./unreferencedVariablesObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

const u = "My favourite";
let dgajd = "is";
var i = "Hello";
var fg = "steak";
var lmaod = "!";
const el = "food";

const sayHello = function () {
  console.log(i + lmaod, u, el, dgajd, fg + lmaod);
};

sayHello();

JAVASCRIPT

And all non-referenced variables are restored.

Extra: By manual analysis, you can probably realize that all the variables declared above the sayHello function can have their values substituted in place of their identifiers inside of the console.log statement. How to accomplish that is outside the scope of this article. But, if you’re interested in learning how to do it, you can read my article about it here.

Example 2: [Dead Code] Empty Statements

An empty statement is simply a semi-colon (;) with no same-line code before it. This script is littered with them:

/**
 * emptyStatementSrc.js
 * Ugly 'obfuscated' code.
*/
;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;
const a = 2;;;;;;;;;
;;;;;;;;;
;;;;
;;;;;;;;;;;;;;
;;;;;
;;;;;;;;;;;;;;;;;;
const b = 3;;;;;;;;;;;;;;;
;;;;;;;;;;;
;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;
;;;;;;;
;;;
;;
console.log("a is", a, "b is", b);;;;;;;;;;;;;;;

JAVASCRIPT

The presence of empty statements doesn’t really add much to obfuscation, but removing them will still remove unnecessary noise and optimize the appearance.

Analysis Methodology

We begin by pasting the example obfuscated script into AST Explorer

正在上传…重新上传取消

A view of the obfuscated code in AST Explorer

We can observe that the top-level view is polluted by EmptyStatement nodes, causing a slight inconvenience when trying to navigate through the AST structure.

The deobfuscator logic is very simple:

Traverse the AST for EmptyStatement nodes.
When one is encountered, delete it with path.remove()

The babel implementation is shown below:

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for deleting empty statements
  const deleteEmptyStatementsVisitor = {
    EmptyStatement(path) {
      path.remove();
    },
  };

  // Execute the visitor
  traverse(ast, deleteEmptyStatementsVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./emptyStatementSrc.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

1
2
3

const a = 2;
const b = 3;
console.log("a is", a, "b is", b);

JAVASCRIPT

And all of the useless EmptyStatements are now removed, enabling easier reading and navigation through the AST.

Example 3: [Unreachable Code] If Statements and Logical Expressions

Let’s take the following ‘obfuscated’ code snippet as an example:

/**
 * unreachableLogicalCodeObfuscated.js
 */

if (!![]) {
  console.log("This always runs! 1");
} else {
  console.log("This never runs.");
}

if (40 > 80) {
  console.log("This never runs.");
} else if (1 < 2) {
  console.log("This always runs! 2");
} else {
  console.log("This never runs.");
}

![] ? console.log("This never runs.") : console.log("This always runs! 3");

// Chained example

if (!![]) {
  console.log("This always runs! 4");
  ![]
    ? console.log("This never runs.")
    : false
    ? console.log("This never runs")
    : 40 < 20
    ? console.log("This never runs.")
    : 80 > 1
    ? console.log("This always runs! 5")
    : 40 > 2
    ? console.log("This never runs.")
    : console.log("This never runs.");
} else {
  console.log("This never runs.");
}

JAVASCRIPT

This is an extremely simple example, especially since the console.log calls tell you exactly what runs and what doesn’t. Keep in mind that code you find in the wild will probably be layered with other obfuscation techniques too. However, this is a good example for understanding the core concept. Rest assured though: the method I’ll discuss should still be universal to all types of unreachable code obfuscation.

Analysis Methodology

As always, we start our analysis by pasting the code into AST Explorer

正在上传…重新上传取消

A view of the obfuscated code in AST Explorer

We can see that at the top level, the file consists of IfStatements and an ExpressionStatement. If we expand the ExpressionStatement, we can see that ternary operator logical expressions are actually of type ConditionalExpression. Expanding an IfStatement and a ConditionalExpression for comparison, we can notice some similarities:

正在上传…重新上传取消

Comparison of IfStatement vs. ConditionalExpression nodes

We can see that aside from their type, both an IfStatement and a ConditionalExpression both have the exact same structure. That is, both contain:

A test property, which is the test condition. It will either evaluate to true or false.
A consequent property, which contains the code to be executed if test evaluates to a truthy value.
A alternate property, which contains the code to be executed if test evaluates to a falsy value.
- Note: This property is optional, since an If Statement need not be accompanied by an else.

For our deobfuscator, we’ll make use of one of the babel API methods: NodePath.evaluateTruthy().

This method takes in a path, then evaluates its node. If the node evaluates to a truthy value, the method returns true. If the node evaluates to a falsy value, the method returns false. See: Truthy Values vs. Falsy Values

The steps for creating the deobfuscator are as follows:

Traverse the AST for IfStatements and ConditionalExpressions. Since both node types have the same structure, we can write a single plugin for both.
- Tip: _To write a function that works for multiple visitor nodes, we can add an _ | seperating them in the method name as a string like this: "IfStatement|ConditionalExpression"
When one is encountered, use the NodePath.evaluateTruthy() method on the test property’s NodePath.
if the NodePath.evaluateTruthy() method returns true:
1. Replace the path with the contents of consequent.
2. (Optional) If the consequent is contained within a BlockStatement (curly braces), we can replace it with the consequent.body to get rid of the curly braces.
if the NodePath.evaluateTruthy() method returns false:
1. If the alternate property exists, replace the path with its contents.
2. (Optional) If the alternate is contained within a BlockStatement (curly braces), we can replace it with the alternate.body to get rid of the curly braces.

The babel implementation is shown below:

Babel Implementation

/**
 * Deobfuscator.js
 * The babel script used to deobfuscate the target file
 *
 */
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;
const t = require("@babel/types");
const generate = require("@babel/generator").default;
const beautify = require("js-beautify");
const { readFileSync, writeFile } = require("fs");

/**
 * Main function to deobfuscate the code.
 * @param source The source code of the file to be deobfuscated
 *
 */
function deobfuscate(source) {
  //Parse AST of Source Code
  const ast = parser.parse(source);

  // Visitor for simplifying if statements and logical statements
  const simplifyIfAndLogicalVisitor = {
    "ConditionalExpression|IfStatement"(path) {
      let { consequent, alternate } = path.node;
      let testPath = path.get("test");
      const value = testPath.evaluateTruthy();
      if (value === true) {
        if (t.isBlockStatement(consequent)) {
          consequent = consequent.body;
        }
        path.replaceWithMultiple(consequent);
      } else if (value === false) {
        if (alternate != null) {
          if (t.isBlockStatement(alternate)) {
            alternate = alternate.body;
          }
          path.replaceWithMultiple(alternate);
        } else {
          path.remove();
        }
      }
    },
  };

  // Execute the visitor
  traverse(ast, simplifyIfAndLogicalVisitor);

  // Code Beautification
  let deobfCode = generate(ast, { comments: false }).code;
  deobfCode = beautify(deobfCode, {
    indent_size: 2,
    space_in_empty_paren: true,
  });
  // Output the deobfuscated result
  writeCodeToFile(deobfCode);
}
/**
 * Writes the deobfuscated code to output.js
 * @param code The deobfuscated code
 */
function writeCodeToFile(code) {
  let outputPath = "output.js";
  writeFile(outputPath, code, (err) => {
    if (err) {
      console.log("Error writing file", err);
    } else {
      console.log(`Wrote file to ${outputPath}`);
    }
  });
}

deobfuscate(readFileSync("./unreachableLogicalCodeObfuscated.js", "utf8"));

JAVASCRIPT

After processing the obfuscated script with the babel plugin above, we get the following result:

Post-Deobfuscation Result

console.log("This always runs! 1");
console.log("This always runs! 2");
console.log("This always runs! 3");
console.log("This always runs! 4");

JAVASCRIPT

And only the code which is designed to execute persists, a full restoration of the code!

Conclusion

Cleaning up dead/unreachable code is an essential component of the deobfuscation process. I would recommend doing it at least twice per program:

Firstly, at the start of the deobfuscation process. This will make the obfuscated script much more manageable to navigate, as you can focus on the important parts of the program instead of junk code
At the end of the deobfuscation process, as part of the clean-up stage. Simplifying obfuscated code tends to reveal more dead code that can be removed, and removing it at the end results in a cleaner final product.

If you’re interested, you can find the source code for all the examples in this repository.

Okay, that’s all I have for you today. I hope that this article helped you learn something new. Thanks for reading, and happy reversing!

复制粘贴，以免失效。