CDC:(Content-Defined Chunking)是一种适用于多种应用环境的重复数据删除算法。这里就是用Hadoop将这个算法并行化,但是没有做到将重复数据删除,只是检测到两个文件的重复部分。


操作系统:ubuntu 12.04




import java.io.DataInput;
import java.io.DataOutput;
import java.io.EOFException;
import java.io.IOException;
import org.apache.hadoop.io.Writable;

public class ChunkInfo implements Writable {
	public int chunk_id; // 子块的id
	public int chunk_size; // 子块的大小
	public int chunk_filenum; // 该子块所属的文件数。
	public int chunk_num; // 该子块在所有文件中出现的总次数。
	public byte blockBytes[] = null; // 存放子块的字节。
	public String chunk_filename; // 子块的文件名
	public String hashValue; // 块的hash值,通常是md5值。

	public ChunkInfo() {
		chunk_id = 0;
		chunk_size = 8 * 1024;
		chunk_filename = "4321";
		chunk_filenum = 1;
		chunk_num = 1;
		hashValue = " ";
		blockBytes = new byte[chunk_size];

	 * <p>
	 * 构造函数
	 * </p>
	 * @param size
	 *            文件块的大小
	 * @return
	public ChunkInfo(int id, int size, String filename, int filenum,
			int chunknum, String hash, byte bytes[]) {
		chunk_id = id;
		chunk_size = size;
		chunk_filename = filename;
		chunk_filenum = filenum;
		chunk_num = chunknum;
		hashValue = hash;
		blockBytes = bytes;

	public void readFields(DataInput arg0) throws IOException {
		// TODO Auto-generated method stub
		// 从输入流中读取类的信息,并把其存放到类中。
		try {
			chunk_id = arg0.readInt(); // 子块的id
			chunk_size = arg0.readInt(); // 子块的大小
			chunk_filenum = arg0.readInt(); // 该子块所属的文件数。
			chunk_num = arg0.readInt(); // 该子块在所有文件中出现的总次数。
			hashValue = arg0.readUTF(); // 块的hash值,通常是md5值。
			chunk_filename = arg0.readUTF(); // 子块的文件名
			// int length = arg0.readInt();
			// arg0.readFully(blockBytes,0,length); //存放子块的字节。
		} catch (EOFException e) {
			return; // 获得读入文件末尾的异常后,函数返回。

	public void write(DataOutput arg0) throws IOException {
		// TODO Auto-generated method stub
		// map阶段将类的信息输出到输出流中。
		// arg0.writeInt(blockBytes.length);
		// arg0.write(blockBytes);

	 * @Override public int compareTo(Object o) { // TODO Auto-generated method
	 * stub // 将这个自定义类型的相互比较设置为哈希值的比较,即若哈希值相同,则这两个对象就相等。 ChunkInfo test =
	 * (ChunkInfo)o; if(test.hashValue.equals(test.hashValue)) return 0; else
	 * return -1; }
	public String toString() {
		return this.chunk_id + " " + this.chunk_size + " "
				+ this.chunk_filename.toString() + " "
				+ this.hashValue.toString() + " " + this.chunk_num + " "
				+ this.chunk_filenum;




import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import SerialAlgorithm.RabinHashFunction;

public class CDC_RecordReader extends RecordReader<IntWritable, ChunkInfo> {

	public int chunkId;
	public FileSplit fileSplit;
	public int chunkSize = 8 * 1024; // 一个文件块的大小。
	public String filename; // 文件的名字。
	public FSDataInputStream fileIn; // 分布式文件系统的输入流。
	public Path filePath; // 分布式文件系统路径。
	public FileSystem fileSystem; // 分布式文件系统。
	public long start; // 文件的第一个字节位置。
	public long pos; // 文件访问位置。
	public long end; // 文件结束的位置。
	public byte buffer[]; // 文件内容缓存。
	public Configuration conf;
	public IntWritable key = new IntWritable(0);
	public byte[] tempbytes = new byte[2];
	public ChunkInfo value = new ChunkInfo(0, chunkSize, " ", 0, 0, " ",
	public int chunkMask; // 划分掩码
	private List<Long> list = new ArrayList<Long>(); // 存放文件块划分点的标记.
	private RabinHashFunction rabin = new RabinHashFunction(); // 用于计算rabin指纹.
	private long magicValue = 1111; // 随便设置的值.

	CDC_RecordReader() {


	public void close() throws IOException {
		// TODO Auto-generated method stub
		if (fileIn != null) {

	public IntWritable getCurrentKey() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return key;

	public ChunkInfo getCurrentValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return value;

	public float getProgress() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		if (start == end) {
			return 0.0f;
		} else {
			return Math.min(1.0f, (pos - start) / (float) (end - start));

	public void initialize(InputSplit arg0, TaskAttemptContext arg1)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		conf = arg1.getConfiguration();
		this.fileSplit = (FileSplit) arg0;
		this.filePath = this.fileSplit.getPath();
		this.chunkId = 0;
		this.start = fileSplit.getStart();
		this.pos = this.start;

		try {
			this.fileSystem = filePath.getFileSystem(conf);
			this.filename = this.filePath.toString();
			this.fileIn = fileSystem.open(filePath);
			// 将文件内容写入out中,再从out中返回byte数组
			ByteArrayOutputStream out = new ByteArrayOutputStream();
			buffer = new byte[4096];
			int n=0;
			while(( n = fileIn.read(buffer)) != -1){
			buffer = out.toByteArray();
		} catch (IOException e) {
			// TODO Auto-generated catch block
		this.markBytesArray(buffer, 10, 128);


	public boolean nextKeyValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		int i = this.chunkId;
		this.chunkId++;// 自增,在传参之后。
		if((i + 1) >= list.size())
			return false;
		value.blockBytes=new byte[(int) (list.get(i+1)-list.get(i))];
		for(int j = 0; j < value.blockBytes.length; j++){
			value.blockBytes[j] = buffer[(int) (list.get(i) + j)];
		value.chunk_filename = filename;
		value.chunk_filenum = 1;
		value.chunk_num = 1;
		value.chunk_id = chunkId;
		return true;

	 * <p>
	 * 通过设置的exp_chunk_size的值,来计算该比对多少位rabin指纹值.
	 * </p>
	 * @return 比对rabin指纹值的位数
	private int calculateMask(int exp_chunk_size) {
		int a = 0;
		a = (int) (Math.log(exp_chunk_size) / Math.log(2));
		a = (int) Math.pow(2, a) - 1;
		return a;

	 * <p>
	 * 通过rabin指纹,为文件划分子块,在bytes数组中做上划分位置的标记, 并将它们保存在list链表中。
	 * </p>
	 * @param bytes
	 *            字节数组,用于存放读入文件的字节。
	 * @param step
	 *            窗口滑动步数。
	 * @param substring_size
	 * 			  窗口长度
	private void markBytesArray(byte bytes[], int step, int substring_size) {

		chunkMask = this.calculateMask(chunkSize);// 计算划分块数时需要的掩码.

		// 在此循环中,按步长substring_size遍历数组bytes,用rabin指纹对
		// 符合一定要求的位置做上标记,并把标记放入list链表中。
		list.add((long) 0);
		for (int i = 0; i < bytes.length; i += step) {
			byte test[] = null;// 读取指定步长的数组
			if (i + substring_size < bytes.length) {
				test = new byte[substring_size];
			} else
				test = new byte[bytes.length - i];

			// 将bytes数组的某一部分放入test数组中,用于计算指纹
			for (int j = 0; j < test.length; j++) {
				test[j] = bytes[i + j];
			long temp = rabin.hash(test);// 计算rabin指纹
			temp = temp & chunkMask;// 得到指纹的后chunkMask位

			// 将指纹与预先设定好的magicValue进行比对
			// 若指纹值等于预先设置好的magicValue或两个划分点之间的大小已经超过了预先设定的划分大小
			// 则标记划分点
			if (temp == magicValue) {
				list.add((long) (i + test.length));
			} else
		if (list.get(list.size() - 1) != bytes.length) {
			list.add((long) (bytes.length - 1));


import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class CDC_FileInputFormat extends
FileInputFormat<IntWritable, ChunkInfo>{

	public RecordReader<IntWritable, ChunkInfo> createRecordReader(
			InputSplit arg0, TaskAttemptContext arg1) throws IOException,
			InterruptedException {
		// TODO Auto-generated method stub
		return new CDC_RecordReader();



import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class CDC_Hadoop {

	public static class CDCMapper extends
			Mapper<IntWritable, ChunkInfo, Text, ChunkInfo> {
		MD5Util MD5 = new MD5Util();

		public void map(IntWritable key, ChunkInfo value, Context context)
				throws IOException, InterruptedException {
			String hashValue = MD5Util.getMD5String(value.blockBytes);
			Text keyOfReduce = new Text();
			value.hashValue = hashValue;
			context.write(keyOfReduce, new ChunkInfo(value.chunk_id,
					value.chunk_size, value.chunk_filename,
					value.chunk_filenum, value.chunk_num, value.hashValue,


	public static class CDCReducer extends
			Reducer<Text, ChunkInfo, Text, IntWritable> {
		IntWritable temp = new IntWritable(0);// 测试用
		Text hashValue = new Text();
		int id = 1;

		public void reduce(Text key, Iterable<ChunkInfo> values, Context context)
				throws IOException, InterruptedException {
			int countChunkNum = 0;
			int countFileNum = 0;
			String filename = " ";
			int i = 0;
			ChunkInfo one = new ChunkInfo();
			// 遍历values。
			for (ChunkInfo chunk : values) {
				if(i == 0){
					filename = chunk.chunk_filename;
				if(chunk.chunk_filename == filename){

			one.chunk_filename = filename;
			one.chunk_filenum = countFileNum;
			one.chunk_num = countChunkNum;
			one.hashValue = key.toString();
			one.chunk_id = id;
			context.write(hashValue, temp);


	public static void main(String[] args) throws Exception {
		long startTime=System.currentTimeMillis();   //获取开始时间  
		Configuration conf = new Configuration();
		Job job = new Job(conf, "CDC");
//		Path in = new Path("hdfs://localhost:9000/user/justyoung/input");
//		Path in2 = new Path("hdfs://localhost:9000/user/justyoung/input2");
//		Path out = new Path("hdfs://localhost:9000/user/justyoung/CDCoutput");
		Path in = new Path("/home/justyoung/input");
		Path in2 = new Path("/home/justyoung/input2");
		Path out = new Path("/home/justyoung/CDCoutput");
		FileInputFormat.setInputPaths(job, in, in2);
		FileOutputFormat.setOutputPath(job, out);

		// job.setCombinerClass(FSPReducer.class);
		long endTime=System.currentTimeMillis(); //获取结束时间  
			System.out.println("程序运行时间: "+(endTime-startTime)+"ms");   
		System.exit(job.waitForCompletion(true) ? 0 : 1);





hadoop dfs -put ~/RemoteSSH.py input
hadoop dfs -put ~/RemoteSSH.py input2





import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class MD5Util {
	 * 默认的密码字符串组合,用来将字节转换成 16 进制表示的字符,apache校验下载的文件的正确性用的就是默认的这个组合
	protected static char hexDigits[] = { '0', '1', '2', '3', '4', '5', '6',
			'7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };

	protected static MessageDigest messagedigest = null;
	static {
		try {
			messagedigest = MessageDigest.getInstance("MD5");
		} catch (NoSuchAlgorithmException nsaex) {
					+ "初始化失败,MessageDigest不支持MD5Util。");
	 * 生成字符串的md5校验值
	 * @param s
	 * @return
	public static String getMD5String(String s) {
		return getMD5String(s.getBytes());
	 * 判断字符串的md5校验码是否与一个已知的md5码相匹配
	 * @param password 要校验的字符串
	 * @param md5PwdStr 已知的md5校验码
	 * @return
	public static boolean checkPassword(String password, String md5PwdStr) {
		String s = getMD5String(password);
		return s.equals(md5PwdStr);
	 * 生成文件的md5校验值
	 * @param file
	 * @return
	 * @throws IOException
	public static String getFileMD5String(File file) throws IOException {		
		InputStream fis;
	    fis = new FileInputStream(file);
	    byte[] buffer = new byte[1024];
	    int numRead = 0;
	    while ((numRead = fis.read(buffer)) > 0) {
	    	messagedigest.update(buffer, 0, numRead);
		return bufferToHex(messagedigest.digest());

	 * JDK1.4中不支持以MappedByteBuffer类型为参数update方法,并且网上有讨论要慎用MappedByteBuffer,
	 * 原因是当使用 FileChannel.map 方法时,MappedByteBuffer 已经在系统内占用了一个句柄,
	 * 而使用 FileChannel.close 方法是无法释放这个句柄的,且FileChannel有没有提供类似 unmap 的方法,
	 * 因此会出现无法删除文件的情况。
	 * 不推荐使用
	 * @param file
	 * @return
	 * @throws IOException
	public static String getFileMD5String_old(File file) throws IOException {
		FileInputStream in = new FileInputStream(file);
		FileChannel ch = in.getChannel();
		MappedByteBuffer byteBuffer = ch.map(FileChannel.MapMode.READ_ONLY, 0,
		return bufferToHex(messagedigest.digest());

	public static String getMD5String(byte[] bytes) {
		return bufferToHex(messagedigest.digest());

	private static String bufferToHex(byte bytes[]) {
		return bufferToHex(bytes, 0, bytes.length);

	private static String bufferToHex(byte bytes[], int m, int n) {
		StringBuffer stringbuffer = new StringBuffer(2 * n);
		int k = m + n;
		for (int l = m; l < k; l++) {
			appendHexPair(bytes[l], stringbuffer);
		return stringbuffer.toString();

	private static void appendHexPair(byte bt, StringBuffer stringbuffer) {
		char c0 = hexDigits[(bt & 0xf0) >> 4];// 取字节中高 4 位的数字转换, >>> 为逻辑右移,将符号位一起右移,此处未发现两种符号有何不同 
		char c1 = hexDigits[bt & 0xf];// 取字节中低 4 位的数字转换 


import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.URL;

 * We compute the checksum using Broder s implementation of
 * Rabin s fingerprinting algorithm. Fingerprints offer 
 * provably strong probabilistic guarantees that two 
 * different strings will not have the same fingerprint. 
 * Other checksum algorithms, such as MD5 and SHA, do not 
 * offer such provable guarantees, and are also more 
 * expensive to compute than Rabin fingerprint.
 * A disadvantage is that these faster functions are 
 * efficiently invertible (that is, one can easily build an 
 * URL that hashes to a particular location), a fact that  
 * might be used by malicious users to nefarious purposes.
 * Using the Rabin's fingerprinting function, the probability of
 * collision of two strings s1 and s2 can be bounded (in a adversarial
 * model for s1 and s2) by max(|s1|,|s2|)/2**(l-1), where |s1| is the 
 * length of the string s1 in bits.
 * The advantage of choosing Rabin fingerprints (which are based on random
 * irreducible polynomials) rather than some arbitrary hash function is that
 * their probability of collision os well understood. Furthermore Rabin 
 * fingerprints can be computed very efficiently in software and we can
 * take advantage of their algebraic properties when we compute the
 * fingerprints of "sliding windows".
 * M. O. Rabin
 * Fingerprinting by random polynomials.
 * Center for Research in Computing Technology
 * Harvard University Report TR-15-81
 * 1981
 * A. Z. Broder
 * Some applications of Rabin's fingerprinting method
 * In R.Capicelli, A. De Santis and U. Vaccaro editors
 * Sequences II:Methods in Communications, Security, and Computer Science
 * pages 143-152
 * Springer-Verlag
 * 1993
public final class RabinHashFunction implements Serializable {

        private final static int P_DEGREE = 64;
        private final static int READ_BUFFER_SIZE = 2048;
        private final static int X_P_DEGREE = 1 << (P_DEGREE - 1);

       /* public static void main(String args[]) {
                RabinHashFunction h = new RabinHashFunction();
        private final byte[] buffer;

        //private long POLY = Long.decode("0x0060034000F0D50A").longValue();
        private long POLY = Long.decode("0x004AE1202C306041").longValue() | 1<<63;

        private final long[] table32, table40, table48, table54;
        private final long[] table62, table70, table78, table84;

         *  Constructor for the RabinHashFunction64 object
         *@param  P  Description of the Parameter
        public RabinHashFunction() {
                table32 = new long[256];
                table40 = new long[256];
                table48 = new long[256];
                table54 = new long[256];
                table62 = new long[256];
                table70 = new long[256];
                table78 = new long[256];
                table84 = new long[256];
                buffer = new byte[READ_BUFFER_SIZE];
                long[] mods = new long[P_DEGREE];
                mods[0] = POLY;
                for (int i = 0; i < 256; i++) {
                        table32[i] = 0;
                        table40[i] = 0;
                        table48[i] = 0;
                        table54[i] = 0;
                        table62[i] = 0;
                        table70[i] = 0;
                        table78[i] = 0;
                        table84[i] = 0;
                for (int i = 1; i < P_DEGREE; i++) {
                        mods[i] = mods[i - 1] << 1;
                        if ((mods[i - 1] & X_P_DEGREE) != 0) {
                                mods[i] = mods[i] ^ POLY;
                for (int i = 0; i < 256; i++) {
                        long c = i;
                        for (int j = 0; j < 8 && c != 0; j++) {
                                if ((c & 1) != 0) {
                                        table32[i] = table32[i] ^ mods[j];
                                        table40[i] = table40[i] ^ mods[j + 8];
                                        table48[i] = table48[i] ^ mods[j + 16];
                                        table54[i] = table54[i] ^ mods[j + 24];
                                        table62[i] = table62[i] ^ mods[j + 32];
                                        table70[i] = table70[i] ^ mods[j + 40];
                                        table78[i] = table78[i] ^ mods[j + 48];
                                        table84[i] = table84[i] ^ mods[j + 56];
                                c >>>= 1;

         *  Return the Rabin hash value of an array of bytes.
         *@param  A  the array of bytes
         *@return    the hash value
        public long hash(byte[] A) {
                return hash(A, 0, A.length, 0);

         *  Description of the Method
         *@param  A       Description of the Parameter
         *@param  offset  Description of the Parameter
         *@param  length  Description of the Parameter
         *@param  w       Description of the Parameter
         *@return         Description of the Return Value
        private long hash(byte[] A, int offset, int length, long ws) {
                long w = ws;
                int start = length % 8;
                for (int s = offset; s < offset + start; s++) {
                        w = (w << 8) ^ (A[s] & 0xFF);
                for (int s = offset + start; s < length + offset; s += 8) {
                        w =
                                table32[(int) (w & 0xFF)]
                                        ^ table40[(int) ((w >>> 8) & 0xFF)]
                                        ^ table48[(int) ((w >>> 16) & 0xFF)]
                                        ^ table54[(int) ((w >>> 24) & 0xFF)]
                                        ^ table62[(int) ((w >>> 32) & 0xFF)]
                                        ^ table70[(int) ((w >>> 40) & 0xFF)]
                                        ^ table78[(int) ((w >>> 48) & 0xFF)]
                                        ^ table84[(int) ((w >>> 56) & 0xFF)]
                                        ^ (long) (A[s] << 56)
                                        ^ (long) (A[s + 1] << 48)
                                        ^ (long) (A[s + 2] << 40)
                                        ^ (long) (A[s + 3] << 32)
                                        ^ (long) (A[s + 4] << 24)
                                        ^ (long) (A[s + 5] << 16)
                                        ^ (long) (A[s + 6] << 8)
                                        ^ (long) (A[s + 7]);
                return w;

         *  Return the Rabin hash value of an array of chars.
         *@param  A  the array of chars
         *@return    the hash value
        public long hash(char[] A) {
                long w = 0;
                int start = A.length % 4;
                for (int s = 0; s < start; s++) {
                        w = (w << 16) ^ (A[s] & 0xFFFF);
                for (int s = start; s < A.length; s += 4) {
                        w =
                                table32[(int) (w & 0xFF)]
                                        ^ table40[(int) ((w >>> 8) & 0xFF)]
                                        ^ table48[(int) ((w >>> 16) & 0xFF)]
                                        ^ table54[(int) ((w >>> 24) & 0xFF)]
                                        ^ table62[(int) ((w >>> 32) & 0xFF)]
                                        ^ table70[(int) ((w >>> 40) & 0xFF)]
                                        ^ table78[(int) ((w >>> 48) & 0xFF)]
                                        ^ table84[(int) ((w >>> 56) & 0xFF)]
                                        ^ ((long) (A[s] & 0xFFFF) << 48)
                                        ^ ((long) (A[s + 1] & 0xFFFF) << 32)
                                        ^ ((long) (A[s + 2] & 0xFFFF) << 16)
                                        ^ ((long) (A[s + 3] & 0xFFFF));
                return w;

         *  Computes the Rabin hash value of the contents of a file.
         *@param  f                       the file to be hashed
         *@return                         the hash value of the file
         *@throws  FileNotFoundException  if the file cannot be found
         *@throws  IOException            if an error occurs while reading the file
        public long hash(File f) throws FileNotFoundException, IOException {
                FileInputStream fis = new FileInputStream(f);
                try {
                        return hash(fis);
                } finally {

         *  Computes the Rabin hash value of the data from an <code>InputStream</code>.
         *@param  is            the InputStream to hash
         *@return               the hash value of the data from the InputStream
         *@throws  IOException  if an error occurs while reading from the
         *      InputStream
        public long hash(InputStream is) throws IOException {
                long hashValue = 0;
                int bytesRead;
                synchronized (buffer) {
                        while ((bytesRead = is.read(buffer)) > 0) {
                                hashValue = hash(buffer, 0, bytesRead, hashValue);
                return hashValue;

         *  Returns the Rabin hash value of an array of integers. This method is the
         *  most efficient of all the hash methods, so it should be used when
         *  possible.
         *@param  A  array of integers
         *@return    the hash value
        public long hash(int[] A) {
                long w = 0;
                int start = 0;
                if (A.length % 2 == 1) {
                        w = A[0] & 0xFFFFFFFF;
                        start = 1;
                for (int s = start; s < A.length; s += 2) {
                        w =
                                table32[(int) (w & 0xFF)]
                                        ^ table40[(int) ((w >>> 8) & 0xFF)]
                                        ^ table48[(int) ((w >>> 16) & 0xFF)]
                                        ^ table54[(int) ((w >>> 24) & 0xFF)]
                                        ^ table62[(int) ((w >>> 32) & 0xFF)]
                                        ^ table70[(int) ((w >>> 40) & 0xFF)]
                                        ^ table78[(int) ((w >>> 48) & 0xFF)]
                                        ^ table84[(int) ((w >>> 56) & 0xFF)]
                                        ^ ((long) (A[s] & 0xFFFFFFFF) << 32)
                                        ^ (long) (A[s + 1] & 0xFFFFFFFF);
                return w;

         *  Returns the Rabin hash value of an array of longs. This method is the
         *  most efficient of all the hash methods, so it should be used when
         *  possible.
         *@param  A  array of integers
         *@return    the hash value
        public long hash(long[] A) {
                long w = 0;
                for (int s = 0; s < A.length; s++) {
                        w =
                                table32[(int) (w & 0xFF)]
                                        ^ table40[(int) ((w >>> 8) & 0xFF)]
                                        ^ table48[(int) ((w >>> 16) & 0xFF)]
                                        ^ table54[(int) ((w >>> 24) & 0xFF)]
                                        ^ table62[(int) ((w >>> 32) & 0xFF)]
                                        ^ table70[(int) ((w >>> 40) & 0xFF)]
                                        ^ table78[(int) ((w >>> 48) & 0xFF)]
                                        ^ table84[(int) ((w >>> 56) & 0xFF)]
                                        ^ (A[s]);
                return w;

         *  Description of the Method
         *@param  obj              Description of the Parameter
         *@return                  Description of the Return Value
         *@exception  IOException  Description of the Exception
        public long hash(Object obj) throws IOException {
                return hash((Serializable) obj);

         *  Returns the Rabin hash value of a serializable object.
         *@param  obj           the object to be hashed
         *@return               the hash value
         *@throws  IOException  if serialization fails
        public long hash(Serializable obj) throws IOException {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                ObjectOutputStream oos = null;
                try {
                        oos = new ObjectOutputStream(baos);
                        return hash(baos.toByteArray());
                } finally {
                        oos = null;
                        baos = null;

         *  Computes the Rabin hash value of a String.
         *@param  s  the string to be hashed
         *@return    the hash value
        public long hash(String s) {
                return hash(s.toCharArray());

         *  Computes the Rabin hash value of the contents of a file, specified by
         *  URL.
         *@param  url           the URL of the file to be hashed
         *@return               the hash value of the file
         *@throws  IOException  if an error occurs while reading from the URL
        public long hash(URL url) throws IOException {
                InputStream is = url.openStream();
                try {
                        return hash(is);
                } finally {


