Base64编码算法（Basic编码）之iharder算法源码解析

最新推荐文章于 2024-08-13 10:21:34 发布
lhever_
最新推荐文章于 2024-08-13 10:21:34 发布
阅读量1.2k
点赞数 1
分类专栏： Java 文章标签： base64 iharder
本文链接：https://blog.csdn.net/nmgrd/article/details/77990615
版权
Java 专栏收录该内容
22 篇文章 0 订阅
订阅专栏


import java.io.UnsupportedEncodingException;

/**
 * Base64编码算法（Basic编码）之iharder算法源码解析
 * （注：本人剔除了非核心方法，并对解码表DECODABET做了微量修改）
 * @author Robert Harder
 * @author rob@iharder.net
 * @version 2.1
 */
public class Base64
{

    private Base64() {}


/* ********  P R I V A T E   F I E L D S  ******** */


    /** The equals sign (=) as a byte. */
    private final static byte EQUALS_SIGN = (byte)'=';

    private final static byte OTHER_BYTE = -1; // Indicates equals sign in encoding

    private final static String ENCODING = "UTF-8";


    /** The 64 valid Base64 values. */
    private final static byte[] ALPHABET;
    private final static byte[] _NATIVE_ALPHABET =
            {
                    (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', (byte)'G',
                    (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N',
                    (byte)'O', (byte)'P', (byte)'Q', (byte)'R', (byte)'S', (byte)'T', (byte)'U',
                    (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z',
                    (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g',
                    (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', (byte)'n',
                    (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u',
                    (byte)'v', (byte)'w', (byte)'x', (byte)'y', (byte)'z',
                    (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5',
                    (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'+', (byte)'/'
            };

    /**
     * Translates a Base64 value to either its 6-bit reconstruction value
     * or a negative number indicating some other meaning.
     **/
    private final static byte[] DECODABET =
            {
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1, -1, -1,
                    62,      // 索引 43 对应'+','+'在base64编码表中的索引是62
                    -1, -1, -1,
                    63,     // 索引 47 对应 '/','/'在base64编码表中的索引是63
                    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,   // 索引48- 57对应0-9,这10个数字在base64编码表中的索引是52-61
                    -1, -1, -1,
                    EQUALS_SIGN,  // 索引61 对应 '='
                    -1, -1, -1,
                     0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,  11, 12, // 索引65 - 77对应字母'A' 到 'M'
                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22,  23,  24, 25, // 索引78 - 90对应字母'N' 到 'Z'
                    -1, -1, -1, -1, -1, -1,
                    26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // 索引 97 - 109对应字母'a' - 'm'
                    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51  // 索引110 - 122对应字母'n' - 'z'
            };



    /** Determine which ALPHABET to use. */
    static
    {
        byte[] __bytes;
        try
        {
            __bytes = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".getBytes( ENCODING );
        }
        catch (java.io.UnsupportedEncodingException use)
        {
            __bytes = _NATIVE_ALPHABET; // Fall back to native encoding
        }
        ALPHABET = __bytes;
    }







/* ********  E N C O D I N G   M E T H O D S  ******** */

    /**
     * numSigBytes的值是3、2、1
     *
     * numSigBytes = 3,表示将source[srcOffset]、source[ srcOffset + 1]、source[ srcOffset + 2]这三个字节
     * 转换成Base64的四个字节
     *
     *
     * numSigBytes = 2,表示将source[srcOffset]、source[ srcOffset + 1]这两个字节转换成Base64的四个字节
     *
     *
     * numSigBytes = 1,表示将source[srcOffset]这单个字节转换成Base64的四个字节
     *
     *  转换得到的4个字节依次存储到 destination[destOffset]、destination[destOffset + 1]、
     *  destination[destOffset + 1]、destination[destOffset + 1]
     */
    private static byte[] encode3to4(
            byte[] source, int srcOffset, int numSigBytes,
            byte[] destination, int destOffset) {
        //           1         2         3
        // 01234567890123456789012345678901 Bit position
        // --------000000001111111122222222 Array position from threeBytes
        // --------|    ||    ||    ||    | Six bit groups to index ALPHABET
        //          >>18  >>12  >> 6  >> 0  Right shift necessary
        //                0x3f  0x3f  0x3f  Additional AND

        // Create buffer with zero-padding if there are only one or two
        // significant bytes passed in the array.
        // We have to shift left 24 in order to flush out the 1's that appear
        // when Java treats a value as negative that is cast from a byte to an int.
        int inBuff = (numSigBytes > 0 ? ((source[srcOffset] << 24) >>> 8) : 0)
                | (numSigBytes > 1 ? ((source[srcOffset + 1] << 24) >>> 16) : 0)
                | (numSigBytes > 2 ? ((source[srcOffset + 2] << 24) >>> 24) : 0);

        switch (numSigBytes) {
            case 3:
                destination[destOffset] = ALPHABET[(inBuff >>> 18)];
                destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
                destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
                destination[destOffset + 3] = ALPHABET[(inBuff) & 0x3f];
                return destination;

            case 2:
                destination[destOffset] = ALPHABET[(inBuff >>> 18)];
                destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
                destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
                destination[destOffset + 3] = EQUALS_SIGN;
                return destination;

            case 1:
                destination[destOffset] = ALPHABET[(inBuff >>> 18)];
                destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
                destination[destOffset + 2] = EQUALS_SIGN;
                destination[destOffset + 3] = EQUALS_SIGN;
                return destination;

            default:
                return destination;
        }
    }

    public static String encodeBytes( byte[] source, int off, int len)
    {
            //在Base64编码规则下，原始字节数 : 编码后的字节数 = 3 : 4，这里求出编码后的字节数总数（不考虑余数）
            int    len43   = len * 4 / 3;
            byte[] outBuff = new byte[
                    ( len43 )
                    + ( (len % 3) > 0 ? 4 : 0 ) // 如果原始字节数不是3的整数倍，多余的字节会被编码成为4个字节，所以还得加4
                    ];
            int d = 0;
            int e = 0;
            int len2 = len - 2;
            for( ; d < len2; d+=3, e+=4 )
            {
                encode3to4( source, d+off, 3, outBuff, e );
            }

            if( d < len )
            {
                encode3to4( source, d+off, len - d, outBuff, e );
                e += 4;
            }

            try
            {
                return new String( outBuff, 0, e, ENCODING );
            }
            catch (java.io.UnsupportedEncodingException uue)
            {
                return new String( outBuff, 0, e );
            }
    }


    public static String encodeBytes( byte[] source )
    {
        return encodeBytes(source, 0, source.length);
    }

    public static String encode(String s)
    {
        byte[] bytes;
        try
        {
            bytes = s.getBytes(ENCODING);
        }
        catch( java.io.UnsupportedEncodingException uee )
        {
            bytes = s.getBytes();
        }

        return encodeBytes(bytes);
    }


/* ********  D E C O D I N G   M E T H O D S  ******** */

    /**
     * 该方法将4个字节还原为三个字节
     * @param source
     * @param srcOffset
     * @param destination
     * @param destOffset
     * @return
     */
    private static int decode4to3( byte[] source, int srcOffset, byte[] destination, int destOffset )
    {
        // Example: Dk==
        if( source[ srcOffset + 2] == EQUALS_SIGN )
        {
            // Two ways to do the same thing. Don't know which way I like best.
            //int outBuff =   ( ( DECODABET[ source[ srcOffset    ] ] << 24 ) >>>  6 )
            //              | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 );
            int outBuff =   ( ( DECODABET[ source[ srcOffset    ] ] & 0xFF ) << 18 )
                    | ( ( DECODABET[ source[ srcOffset + 1] ] & 0xFF ) << 12 );

            destination[ destOffset ] = (byte)( outBuff >>> 16 );
            return 1;
        }

        // Example: DkL=
        else if( source[ srcOffset + 3 ] == EQUALS_SIGN )
        {
            // Two ways to do the same thing. Don't know which way I like best.
            //int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] << 24 ) >>>  6 )
            //              | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
            //              | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 );
            int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] & 0xFF ) << 18 )
                    | ( ( DECODABET[ source[ srcOffset + 1 ] ] & 0xFF ) << 12 )
                    | ( ( DECODABET[ source[ srcOffset + 2 ] ] & 0xFF ) <<  6 );

            destination[ destOffset     ] = (byte)( outBuff >>> 16 );
            destination[ destOffset + 1 ] = (byte)( outBuff >>>  8 );
            return 2;
        }

        // Example: DkLE
        else
        {
            try{
                // Two ways to do the same thing. Don't know which way I like best.
                //int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] << 24 ) >>>  6 )
                //              | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
                //              | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 )
                //              | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 );
                int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] & 0xFF ) << 18 )
                        | ( ( DECODABET[ source[ srcOffset + 1 ] ] & 0xFF ) << 12 )
                        | ( ( DECODABET[ source[ srcOffset + 2 ] ] & 0xFF ) <<  6)
                        | ( ( DECODABET[ source[ srcOffset + 3 ] ] & 0xFF )      );


                destination[ destOffset     ] = (byte)( outBuff >> 16 );
                destination[ destOffset + 1 ] = (byte)( outBuff >>  8 );
                destination[ destOffset + 2 ] = (byte)( outBuff       );

                return 3;
            }catch( Exception e){
                System.out.println(""+source[srcOffset]+ ": " + ( DECODABET[ source[ srcOffset     ] ]  ) );
                System.out.println(""+source[srcOffset+1]+  ": " + ( DECODABET[ source[ srcOffset + 1 ] ]  ) );
                System.out.println(""+source[srcOffset+2]+  ": " + ( DECODABET[ source[ srcOffset + 2 ] ]  ) );
                System.out.println(""+source[srcOffset+3]+  ": " + ( DECODABET[ source[ srcOffset + 3 ] ]  ) );
                return -1;
            }
        }
    }


    public static byte[] decode( byte[] source, int off, int len )
    {
        //在Base64编码规则下，原始字节数 : 编码后的字节数 = 3 : 4，这里估算解码后的字节数总数
        int    len34   = len * 3 / 4;
        byte[] outBuff = new byte[ len34 ]; // Upper limit on size of output, outBuff用于存放解码后的字节
        int    outBuffPosn = 0;

        byte[] b4        = new byte[4];
        int    b4Posn    = 0;
        int    i         = 0;
        byte   sbiCrop   = 0;
        byte   sbiDecode = 0;
        for( i = off; i < off + len; i++ )
        {
            sbiCrop = (byte)(source[i] & 0x7f); // Only the low seven bits
            sbiDecode = DECODABET[sbiCrop ];

            if(sbiDecode != OTHER_BYTE)
            {
                    b4[ b4Posn++ ] = sbiCrop;
                    if( b4Posn > 3 )
                    {
                        outBuffPosn += decode4to3( b4, 0, outBuff, outBuffPosn );
                        b4Posn = 0;

                        // If that was the equals sign, break out of 'for' loop
                        if( sbiCrop == EQUALS_SIGN )
                            break;
                    }
            }
            else
            {
                System.err.println( "Bad Base64 input character at " + i + ": " + source[i] + "(decimal)" );
                return null;
            }
        }

        byte[] out = new byte[ outBuffPosn ];
        System.arraycopy( outBuff, 0, out, 0, outBuffPosn );
        return out;
    }

    public static byte[] decode(String s)
    {
        byte[] bytes;
        try
        {
            bytes = s.getBytes(ENCODING);
        }
        catch( java.io.UnsupportedEncodingException uee )
        {
            bytes = s.getBytes();
        }

        bytes = decode( bytes, 0, bytes.length );

        return bytes;
    }


/* ********  TEST   M E T H O D S  ******** */

    public static void main(String... args) throws UnsupportedEncodingException {

        String str = "我是中国人!@#$%^&*()_+S【】；0987655.、，；‘、】【屏";
        String enstr = encode(str);
        System.out.println(enstr);
        byte[] debytes = decode(enstr);
        String destr = new String( debytes, 0, debytes.length, ENCODING );
        System.out.println(destr);
    }

}