算法:离我们也没有那么遥远

最近在学MIT的《算法导论》,感觉算法离我们没有想象的那么遥远:http://v.163.com/special/opencourse/algorithms.html


算法在什么层次上影响执行速度,或者在什么条件下才需要关注算法。

我始终坚信,但系统复杂度达到一定程度,譬如操作系统,就不仅仅是算法的问题,系统本身的复杂性也足以优先算法考虑。不过《算法导论》第一课说的也没错,算法是基础,不可忽略。所以就将课中提到的算法用c++写了出来,并且实际运行对比--嗯,发现收获不小。

应该要提升算法的低位,我的认识应该改成:当系统复杂度达到一定程度,整体应该被优先考虑,算法也不可忽略。

当然和机器相关,考虑普通PC(约执行3亿行代码/秒),当数据达到万级别时的排序就需要考虑选择更高级的算法了。

以int数组排序为例,插入排序和合并排序,1秒钟插入排序能排长度为4万的整形数组,合并排序能支持400万长度数组。

[winlin@localhost algorithm]$ g++ -g -O0 merge_sort.cpp -o merge_sort; time ./merge_sort
real    0m0.671s
user    0m0.500s
sys     0m0.055s

[winlin@localhost algorithm]$ g++ -g -O0 merge_sort.cpp -o merge_sort; time ./merge_sort
real    0m0.925s
user    0m0.689s
sys     0m0.070s

插入排序:

void insert_sort(int* data, int size){
    cout << "insert sort algorithm" << endl
        << "InsertSort A[1...n]" << endl
        << "  for i <- 2 to n" << endl
        << "    let key <- A[i]" << endl
        << "    for j <- i-1 to 1 and A[j] > key" << endl
        << "      A[j+1] <- A[j]" << endl
        << "    A[j+1] <- key" << endl;
        
    for(int i = 1; i < size; i++){
        int key = data[i];
        
        int j = i - 1;
        for(; j >= 0 && data[j] > key; j--){
            data[j+1] = data[j];
        }
        
        data[j+1] = key;
    }
}

合并排序算法:

void merge_sort(int* data, int size){
    static bool first = true;
    if(first){
        first = false;
        cout << "merge sort algorithm" << endl
            << "MergeSort A[1...n]" << endl
            << "  1. if n == 1, done." << endl
            << "  2. MergeSort(1...[n/2]])" << endl
            << "     MergeSort([n/2]+1...n)" << endl
            << "  3. merge 2 sorted list" << endl;
    }
    
    if(size == 1){
        return;
    }
    
    int* part1 = data;
    int part1_len = size / 2;
    int* part2 = data + part1_len;
    int part2_len = size - part1_len;
    
    merge_sort(part1, part1_len);
    merge_sort(part2, part2_len);
    
    int* temp = new int[size];
    int* p = temp;
    for(int i = 0; i < size; i++){
        if(part1_len == 0 || (part2_len > 0 && *part2 < *part1)){
            *p++ = *part2++;
            part2_len --;
        }
        else{
            *p++ = *part1++;
            part1_len --;
        }
    }
    
    memcpy(data, temp, size * sizeof(int));
    delete[] temp;
}

binary search:

int binary_search(int* data, int size, int elem){
    static bool first = true;
    if(first){
        first = false;
        cout << "binary search algorithm" << endl
            << "BinarySearch A[1...n] find elem" << endl
            << "  1. if n == 0, return -1, done." << endl
            << "  2. if A[[n/2] == elem:" << endl
            << "        return [n/2]" << endl
            << "     else if A[[n/2] > elem:" << endl
            << "        BinarySearch(1...[n/2]])" << endl
            << "     else if A[[n/2] < elem:" << endl
            << "        BinarySearch([n/2]+1...n)" << endl
            << "  3. combile: do nothing." << endl;
    }
    
    if(size == 0){
        return -1;
    }
    
    int* part1 = data;
    int part1_len = size / 2;
    int* part2 = data + part1_len + 1;
    int part2_len = size - part1_len - 1;
    
    if(data[part1_len] == elem){
        return part1_len;
    }
    else if(data[part1_len] > elem){
        return binary_search(part1, part1_len, elem);
    }
    else{
        int index = binary_search(part2, part2_len, elem);
        return (index < 0)? index : part1_len + index + 1;
    }
}

考虑乘方算法:x^n = x * x * x .... * x

用普通算法计算:

    double x = 1.00000001;
    int n = 0x7FFFFFFF;
    double ret = 1;
    for(int i = 0; i < n; i++){
        ret = x * ret;
    }
T(n) = n

由于计算量巨大,所花时间为:
real    0m7.708s
user    0m4.737s
sys     0m0.876s

若使用分治法处理:

double native_xn(double x, int n){
    static bool first = true;
    if(first){
        first = false;
        cout << "calc the x^n" << endl
            << "native_xn x^n" << endl
            << "  1. if n == 0, return 1, done." << endl
            << "  2. if n is odd:" << endl
            << "        return x * native_xn((n-1)/2) * native_xn((n-1)/2)" << endl
            << "     else:" << endl
            << "        return native_xn(n/2) * native_xn(n/2);" << endl
            << "  3. combine: nothing" << endl;
    }
    
    if(n == 0){
        return 1;
    }
    
    if((n % 2) == 1){
        double r = native_xn(x, (n -1 ) /2);
        return x * r * r;
    }
    else{
        double r = native_xn(x, n /2);
        return r * r;
    }
}
T(n) = lg(n)

所花时间是:

real    0m0.026s
user    0m0.007s
sys     0m0.015s

这个区别就很大了。


再看斐波拉契数列的计算。

F(n)={

0, if n=0

1, if n=1

F(n-1)+F(n-2) if n>=2

}

所以递归是最直观的解法:

double fibonacci_native(int n){
    static bool first = true;
    if(first){
        first = false;
        cout << "calc the fibonacci(n)" << endl
            << "fibonacci_native n" << endl
            << "  1. if n == 0 or 1, return n, done." << endl
            << "  2. return fibonacci_native(n -1) + fibonacci_native(n - 2)" << endl
            << "  3. combine: nothing" << endl;
    }
    
    if(n < 2){
        return n;
    }
    
    return fibonacci_native(n -1) + fibonacci_native(n -2);
}
当然运行时间昂贵:

n=43时:

fibonacci(43)=433494437.000000

real    0m9.343s
user    0m4.815s
sys     0m0.838s

T(n)=x^n,即指数幂,因为被重复计算了很多次,算F(n)时需要算F(n-1)和F(n-2),算F(n-1)时需要计算F(n-3)和重新计算F(n-2)。

用cache的方法,可以将重复计算去掉,从而达到T(n)=n,即从底部开始算起。

double fibonacci_cache(int n){
    static bool first = true;
    if(first){
        first = false;
        cout << "calc the fibonacci(n)" << endl
            << "fibonacci_cache n" << endl
            << "  1. build cache: n2=F(n-2), n1=F(n-1)." << endl
            << "  2. n2=0,n1=1,ret=0" << endl
            << "  3. for [2...N]" << endl
            << "         ret = n1+n2;"<<endl
            << "         n2 = n1;"<<endl
            << "         n1 = ret;"<<endl;
    }
    
    double cache_pre_n2 = 0;
    double cache_pre_n1 = 1;
    double ret = 0;
    for(int i = 1; i < n; i++){
        ret = cache_pre_n1 + cache_pre_n2;
        cache_pre_n2 = cache_pre_n1;
        cache_pre_n1 = ret;
    }
    
    return ret;
}

当n=1024时也不用什么时间:

fibonacci(1024)=4506699633677816191404865591201603611210057765586363088692424961083421629061324540306009631764407814868917761514659447075449365476418924571096193010086458680628417980162101749952294888691146652624641609216913571840.000000
real    0m0.017s
user    0m0.004s
sys     0m0.009s

这个对比很明显呐~

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

winlinvip

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值