Java parallel Bucket Sort v 3.0

本文探讨了对3亿个随机整数进行排序的Java并行桶排序优化,尽管内存需求约为18GB,但通过4个线程实现了约1.6倍的速度提升。作者建议在更大数据规模(如100亿)下,平行计算性能更优,推荐使用C或C++进行平行计算研究。此外,桶排序适用于整数且数据范围有限的情况,是一种以空间换取时间的排序算法,对整数数组的访问速度是其效率的关键。
摘要由CSDN通过智能技术生成

除了求最值没有优化,其他做了一点优化,主要是内存的使用,但是目前跑起来还是需要 18 gigs 内存左右,因为需要对 三亿个随机整数进行排序,开了4个线程,动态开辟线程会更慢.

linear_time: 14.669 secs
parallel_time: 9.279 secs
speed up : 1.5808815605129865
最好情况可以加速一倍,不理想!也许12个线程全开会好一些.个人感觉java不是为了高性能计算而生的,更适合做互联网应用. 个人建议不要使用线程池,会更慢!因为我们都是一锤子买卖,只是需要得到实验结果,没有百万用户每日每夜的访问。

4亿个整数:
linear_time: 21.917 secs
parallel_time: 10.672 secs
speed up : 2.053691904047976
当然了,可能是因为 cpu 线性计算数字运算非常快,显得平行计算有点鸡肋. 但是,当数据规模达到100亿,平行计算的性能就会很明显,不过这种情况并不多见。建议研究平行计算 使用 c or cpp ,而不是java。建议各位读者研究数据结构的话,样本一个亿起步,否则样本太小意义不大,比如冒泡排序,我觉得没啥实际意义。
注意一点关于桶排序,整数数据范围,这个非常重要,待排序整数必须有一个范围,而且必须得是整数,小数好像不能使用桶排序。因为,桶排序依靠的就是数组索引值映射待排序数字!你最后得到的数据,根本不是原来你输入 的数据,而是他们在数组上的索引映射,也就是说,你最后得到的是数组索引值!有点类似量子力学那种。桶排序对内存消耗极大,典型的用空间换时间!比如:你的最大待排整数最大为:100000000,那么你必须有个这么长的数组!数值就是索引值,索引值就是数值!它之所以快,因为,我觉得是:对整数数组的访问超快!你给一个索引,闪电般找到这个索引对应的值!
int [] my_array= new int[100000000];
my_array[100000000-1]= super fast…

public class MaxMinTool {
    public static int max_item(int original,int fresh_one){
         return fresh_one>original?fresh_one:original;
    }

    public static int max_item(int one,int two,int three,int four){
        return max_item(max_item(one,two),max_item(three,four));
    }
}
import java.util.List;

public class PrintTool {
    public static void print_array(List list)
    {
        int counter = 0;
        for (int i = 0; i < list.size(); i++) {

            int[] array = (int[]) list.get(i);

            for (int j = 0; j < array.length; j++) {


                System.out.println(array[j]);
                ++counter;
            }

        }

        System.out.println("List counter:"+counter);
    }

    public static void print_array(int[] array){
        for (int j = 0; j < array.length; j++) {

            System.out.print(array[j]+"-");

        }
    }
}
import java.util.Random;

public class RandomGenerator {

    public static int[] random_generate(int min,int max,int numtrials){

        int[] int_array = new int[numtrials];

        Random random = new Random();

        for (int i = 0; i < numtrials; i++) {
            int random_num = random.nextInt(max)%(max-min+1) + min;
            int_array[i] = random_num;
        }

        return int_array;
    }

}
public class BucketSort {

    public int max;

    public int[] linear_bucket_sort(int[] original_array){
        for (int i = 0; i < original_array.length; i++) {
          max = MaxMinTool.max_item(max,original_array[i]);
        }

       return bucket_sort(original_array,max);

    }

    public int[] bucket_sort(int[] original_array,int max){

            // index: 0-99 but max is : 100 . the point is the array index for bucketsort
           int[] store_array = new int[max+1];

            for (int i = 0; i < original_array.length; i++) {
                int int_num = original_array[i];

                // here is the miracle of bucketsort
                store_array[int_num]= ++store_array[int_num];
            }

           int[] final_sorted_array = new int[original_array.length];
           int cherry_pointer = 0;

        for (int cherry = 0; cherry < store_array.length; cherry++) {

            int cherry_num = store_array[cherry];
            for (int j = 0; j < cherry_num; j++) {
                final_sorted_array[cherry_pointer]=cherry;
                ++cherry_pointer;
            }

        }

           return final_sorted_array;
    }
}
public class GoToBucketThread extends Thread{

    public int begin_index;
    public int[] random_array;
    public int bucket_size;
    public int cpu_num;

    public int[] arr0;
    public int pointer0 = 0;

    public int[] arr1;
    public int pointer1 = 0;

    public int[] arr2;
    public int pointer2 = 0;

    public int[] arr3;
    public int pointer3 = 0;

    public int max0;
    public int max1;
    public int max2;
    public int max3;

    public GoToBucketThread(int begin_index, int[] random_array, int bucket_size,int cpu_num) {
        this.begin_index = begin_index;
        this.random_array = random_array;
        this.bucket_size = bucket_size;
        this.cpu_num = cpu_num;

        arr0 = new int[bucket_size];
        arr1 = new int[bucket_size];
        arr2 = new int[bucket_size];
        arr3 = new int[bucket_size];

    }

    @Override
    public void run() {
        classify_data();
        arr0 = wash_zero_data(arr0,pointer0);
        arr1 = wash_zero_data(arr1,pointer1);
        arr2 = wash_zero_data(arr2,pointer2);
        arr3 = wash_zero_data(arr3,pointer3);
    }

    public int[] wash_zero_data(int[] original_array,int array_pointer){


        int[] new_array = new int[array_pointer];

        for (int i = 0; i < array_pointer; i++) {
            new_array[i] = original_array[i];
        }
        return new_array;

    }



    public void classify_data(){

        // range random number in different areas.... 1-25,26-50,51-75,76-100
        for (int i = begin_index; i < random_array.length; i+=cpu_num) {

            int bucket_num = random_array[i]/(bucket_size);

            bucket_num = bucket_num>(cpu_num-1)?(cpu_num-1):bucket_num;

            switch (bucket_num)
            {
                case 0: arr0[pointer0] = random_array[i];
                        ++pointer0;
                        break;

                case 1: arr1[pointer1] = random_array[i];
                        ++pointer1;
                        break;

                case 2: arr2[pointer2] = random_array[i];
                        ++pointer2;
                         break;

                case 3: arr3[pointer3] = random_array[i];
                        ++pointer3;
                        break;
            }

        }  // all cherries are in the right area
    }
}
public class SmallBucketThread extends Thread{

    public int[] original_array;
    public int[] final_sorted_array;
    public int max_num;
    public int min_num;

    public SmallBucketThread(int[] original_array,int min_num,int max_num) {
        this.original_array = original_array;
        final_sorted_array = new int[original_array.length];
        this.max_num = max_num;
        this.min_num = min_num;
    }

    @Override
    public void run() {
        bucket_sort_parallel(original_array,max_num);
    }



    public void bucket_sort_parallel(int[] original_array,int max){

        // index: 0-99 but max is : 100 . the point is the array index for bucketsort
        int[] store_array = new int[max+1];

        for (int i = 0; i < original_array.length; i++) {
                 int int_num = original_array[i];
                // here is the miracle of bucketsort,index is the value,and the value is the index as well
                store_array[int_num]= ++store_array[int_num];

        }

        int cherry_pointer = 0;


            // loop the cherry store....
            for (int cherry = min_num; cherry < max_num; cherry++) {

                int cherry_num = store_array[cherry];
                // just pick up cherry at least one
                if (cherry_num!=0){
                    for (int j = 0; j < cherry_num; j++) {
                        final_sorted_array[cherry_pointer] = cherry;
                           // pointer is at the end
                        if (cherry_pointer==original_array.length-1)
                            break;
                        ++cherry_pointer;
                    }
                }

            }

    }
}
import java.util.Random;

public class RandomGenerator {

    public static int[] random_generate(int min,int max,int numtrials){

        int[] int_array = new int[numtrials];

        Random random = new Random();

        for (int i = 0; i < numtrials; i++) {
            int random_num = random.nextInt(max)%(max-min+1) + min;
            int_array[i] = random_num;
        }

        return int_array;
    }

}
public class MyTimer {
    private static long begin;

    public static void time_start(){
        begin = System.nanoTime();
    }

    public static double time_end_milli_seconds(){
        return System.nanoTime()-begin/1e6;

    }

    public static double time_end_seconds(){
        double total_time =  (System.nanoTime()-begin)/1e6;
        return ((double)((int)total_time))/1000;

    }

}
import java.util.List;

public class TestBucketSort {

    public void print_array(List list)
    {
        for (int i = 0; i < list.size(); i++) {

            int[] array = (int[]) list.get(i);

            for (int j = 0; j < array.length; j++) {

                System.out.println(array[j]);

            }

        }
    }

    public void print_array(int[] array){
        for (int j = 0; j < array.length; j++) {

            System.out.print(array[j]+"-");

        }
    }

    public double linear_bucketsort(int min,int max,int num_trails,int[] random_array){

        MyTimer.time_start();

        BucketSort bucketSort = new BucketSort();

        int[] sorted_array = bucketSort.linear_bucket_sort(random_array);

        double time_end = MyTimer.time_end_seconds();

        return time_end;
    }

    public static void main(String[] args) throws InterruptedException {

        int test_num = 300000000;

        int min = 1;
        int max = test_num; // 2 billion
        int num_trails = test_num; // 100 million

        int[] random_array = RandomGenerator.random_generate(min,max,num_trails);
      //  PrintTool.print_array(random_array);

        TestBucketSort testBucketSort = new TestBucketSort();
        double linear_time =  testBucketSort.linear_bucketsort(min,max,num_trails,random_array);

        BucketSortParallel parallel = new BucketSortParallel();
        double parallel_time = parallel.bucket_sort_parallel(min,max,num_trails,random_array);

       // System.out.println("parallel result: ");
      //   PrintTool.print_array(parallel.sorted_list);

       System.out.println("linear_time: "+linear_time+" secs");
       System.out.println("parallel_time: "+parallel_time+" secs");
       System.out.println("speed up : "+ (linear_time/parallel_time));


    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值