数组去重的各种方式对比-CSDN博客

数组去重，是一个老生常谈的问题了，在各厂的面试中也会有所提及，接下来就来细数一下各种数组去重的方式吧；

对于以下各种方式都统一命名为 unique，公用代码如下：

// 生成一个包含100000个[0,50000)随机数的数组
var arr = [];
for(var i = 0; i < 100000; i++) {
    arr.push(Math.floor(Math.random() * 50000));
}
Array.prototype.unique = function() { // 算法 };
console.log(arr.unique());  // 一个已经去重的数组
复制代码

1、双重遍历

双重遍历的实现主要是通过两次遍历的对比，生成一个新的，不含重复数据的数组；

其实现方式有如下两种：

/*
 * 第一种实现方式：
 * 对数组的每个元素在推入新数组之前与新数组中每个元素比较，如果没有相同值则推入
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [], isRepeat;
    for(var i = 0, length = this.length; i < length; i++) {
        isRepeat = false;
        for(var j = 0, newLength = newArray.length; j < newLength; j++) {
            if(this[i] === newArray[j]) {
                isRepeat = true;
                break;
            }
        }
        if(!isRepeat) newArray.push(this[i]);
    }
    return newArray;
};
/*
 * 第二种实现方式
 * 将数组的每个元素与其后面所有元素做遍历对比，若有相同的值，则不推入新数组，
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [], isRepeat;
    for(var i = 0, length = this.length; i < length; i++) {
        isRepeat = false;
        for(var j = i + 1; j < length; j++) {
            if(this[i] === this[j]) {
                isRepeat = true;
                break;
            }
        }
        if(!isRepeat) newArray.push(this[i]);
    }
    return newArray;
};

// 实测耗时
// 方式一：2372 ms
// 方式二：4025 ms
复制代码

2、Array.prototype.indexOf()

通过 indexOf 方法查询值在数组中的索引，并通过对索引的判断来实现去重；

主要实现方式有下面两种：

/**
 * 第一种实现方式
 * 结合数组的 filter 方法，将相同值的第一个合并到一个新的数组中返回
 * indexOf 检测到的索引为出现当前值的第一个位置
 * 若 indexOf 检测到的索引和当前值索引不相等则说明前面有相同的值
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    return this.filter(function(item, index, array) {
        return array.indexOf(item) === index;
    });
};
/**
 * 第二种实现方式
 * 对数组进行遍历，并将每个元素在新数组中匹配
 * 若新数组中无该元素，则插入
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [];
    this.forEach(function(item) {
        if(newArray.indexOf(item) === -1) newArray.push(item);
    });
    return newArray;
};

// 实测耗时
// 方式一：3972 ms
// 方式二：2650 ms
复制代码

3、Array.prototype.sort()

sort 方法可对数组进行排序，此时相同的值就会被排到一起，然后通过相邻元素比较就可知是否有相同值了；

举个栗子：

/**
 * 第一种实现方式
 * 先将数组通过 sort 排序
 * 再遍历数组，将每个元素与其前面一个元素比较
 * 若值不同则表示该元素第一次出现，则插入到新数组
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [];
    this.sort();
    for(var i = 0, length = this.length; i < length; i++) {
        if(this[i] !== this[i - 1]) newArray.push(this[i]);
    }
    return newArray;
};
/**
 * 第二种实现方式
 * 先将数组通过 sort 排序
 * 再遍历数组，将每个元素与插入到新数组中的最后一个元素比较
 * 若值不同则表示该元素第一次出现，则插入到新数组
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [];
    this.sort();
    for(var i = 0, length = this.length; i < length; i++) {
        if(this[i] !== newArray[newArray.length - 1]) newArray.push(this[i]);
    }
    return newArray;
};

// 实测耗时
// 方式一：105 ms
// 方式二：112 ms
复制代码

由于方式二在每次比较的时候需要重新计算一次 newArray.length 故会稍微比方式一慢一点；

3、Array.prototype.includes(searchElm, fromIndex)

该方法判断数组中是否存在指定元素

参数：

searchElm：需要查找的元素
fromIndex：开始查找索引位置（若未负值，则从 array.length - fromIndex 位置开始查找

返回值：

Boolean：数组中是否存在该元素

/**
 * 实现方式
 * 遍历数组，通过 includes 判断每个值在新数组中是否存在
 * 若不存在，则将值插入到新数组中
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArray = [];
    this.forEach(function(item) {
        if(!newArray.includes(item)) newArray.push(item);
    });
    return newArray;
};

// 实测耗时：2597 ms
复制代码

4、Array.prototype.reduce()

/**
 * 实现方式
 * 先将数组进行排序
 * 然后利用 reduce 迭代和累加的特性，将符合要求的元素累加到新数组并返回
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    return this.sort().reduce(function(newArr, curr) {
        if(newArr[newArr.length - 1] !== curr) newArr.push(curr);
        return newArr;
    }, []);
};

// 实测耗时：127 ms
复制代码

5、对象的键值对

利用对象的 key 不能重复的特性来去重；之前看到有人使用对象的键值对去重的时候，直接将数组的每个值设置为对象的 key，value 都为1，每出现一个相同的值时就 value++，这样既可以去重，又可以知道对应的值出现的次数，例如：

var array = ['a', 'b', 'c', 'a', 'a', 'c'];
var newArr = [], obj = {};
array.forEach(function(item) {
    if(obj[item]) {
        obj[item]++;
    } else {
        obj[item] = 1;
        newArr.push(item);
    }
});
console.log(newArr); // ["a", "b", "c"]
console.log(obj);    // {a: 3, b: 1, c: 2}
复制代码

咋一看好像很完美，可是仔细一想，会发现有以下几点原因：

若数组的值中出现了隐式类型转换成字符串后相等的值，则无法区分，例如 '1' 和 1；
若数组中的值有对象，写成 key 之后都是 [object Object]，无法区分；

解决方案：

若元素值的类型为 object，则通过 JSON.stringify 方法进行转换；

Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var newArr = [], obj = {};
    this.forEach(function(item) {
        if(!obj[typeof item + JSON.stringify(item)]) {
            obj[typeof item + JSON.stringify(item)] = 1;
            newArr.push(item);
        }
    });
    return newArr;
};

// 实测耗时：142 ms
复制代码

6、Set

Set 对象的特性就是其中的值是唯一的，可利用该特性很便捷的处理数组去重的问题；

/**
 * 实现方式一
 * 将数组转换成 Set 对象
 * 通过 Array.from 方法将 Set 对象转换为数组
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    var set = new Set(this);
    return Array.from(set);
};

// 实测耗时：45 ms

/**
 * 实现方式二
 * 将数组转换成 Set 对象
 * 通过 Array.from 方法将 Set 对象转换为数组
 */
Array.prototype.unique = function() {
    if(!Array.isArray(this)) throw new Error('Type Error: The target should be an Array!');
    return [...new Set(this)];
};

// 实测耗时：65 ms
复制代码