JAVA版PageRank查询代码

也就是以下两段代码:

/**
 * 
 */
package com.Experiment.ThemeIdentify.PageRank;

/**
 * cee.open.pagerank.JenkinsHash.java
 * 
 * This is a Bob Jenkins hashing algorithm implementation
 * 
 * These are functions for producing 32-bit hashes for hash table lookup.
 * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
 * are externally useful functions.  Routines to test the hash are included
 * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
 * the public domain.  It has no warranty.
 *
 * @version $Revision:$
 *          $Author:$
 */
public class JenkinsHash {

    // max value to limit it to 4 bytes
    private static final long MAX_VALUE = 0xFFFFFFFFL;

    // internal variables used in the various calculations
    long a;
    long b;
    long c;

    /**
     * Convert a byte into a long value without making it negative.
     * @param b
     * @return
     */
    private long byteToLong(byte b) {
        long val = b & 0x7F;
        if ((b & 0x80) != 0) {
            val += 128;
        }
        return val;
    }

    /**
     * Do addition and turn into 4 bytes.
     * @param val
     * @param add
     * @return
     */
    private long add(long val, long add) {
        return (val + add) & MAX_VALUE;
    }

    /**
     * Do subtraction and turn into 4 bytes.
     * @param val
     * @param subtract
     * @return
     */
    private long subtract(long val, long subtract) {
        return (val - subtract) & MAX_VALUE;
    }

    /**
     * Left shift val by shift bits and turn in 4 bytes.
     * @param val
     * @param xor
     * @return
     */
    private long xor(long val, long xor) {
        return (val ^ xor) & MAX_VALUE;
    }

    /**
     * Left shift val by shift bits.  Cut down to 4 bytes.
     * @param val
     * @param shift
     * @return
     */
    private long leftShift(long val, int shift) {
        return (val << shift) & MAX_VALUE;
    }

    /**
     * Convert 4 bytes from the buffer at offset into a long value.
     * @param bytes
     * @param offset
     * @return
     */
    private long fourByteToLong(byte[] bytes, int offset) {
        return (byteToLong(bytes[offset + 0])
                + (byteToLong(bytes[offset + 1]) << 8)
                + (byteToLong(bytes[offset + 2]) << 16)
                + (byteToLong(bytes[offset + 3]) << 24));
    }

    /**
     * Mix up the values in the hash function.
     */
    private void hashMix() {
        a = subtract(a, b);
        a = subtract(a, c);
        a = xor(a, c >> 13);
        b = subtract(b, c);
        b = subtract(b, a);
        b = xor(b, leftShift(a, 8));
        c = subtract(c, a);
        c = subtract(c, b);
        c = xor(c, (b >> 13));
        a = subtract(a, b);
        a = subtract(a, c);
        a = xor(a, (c >> 12));
        b = subtract(b, c);
        b = subtract(b, a);
        b = xor(b, leftShift(a, 16));
        c = subtract(c, a);
        c = subtract(c, b);
        c = xor(c, (b >> 5));
        a = subtract(a, b);
        a = subtract(a, c);
        a = xor(a, (c >> 3));
        b = subtract(b, c);
        b = subtract(b, a);
        b = xor(b, leftShift(a, 10));
        c = subtract(c, a);
        c = subtract(c, b);
        c = xor(c, (b >> 15));
    }

    /**
     * Hash a variable-length key into a 32-bit value.  Every bit of the
     * key affects every bit of the return value.  Every 1-bit and 2-bit
     * delta achieves avalanche.  The best hash table sizes are powers of 2.
     *
     * @param buffer       Byte array that we are hashing on.
     * @param initialValue Initial value of the hash if we are continuing from
     *                     a previous run.  0 if none.
     * @return Hash value for the buffer.
     */
    public long hash(byte[] buffer, long initialValue) {
        int len, pos;

        // set up the internal state
        // the golden ratio; an arbitrary value
        a = 0x09e3779b9L;
        // the golden ratio; an arbitrary value
        b = 0x09e3779b9L;
        // the previous hash value
        c = 0x0E6359A60L;

        // handle most of the key
        pos = 0;
        for (len = buffer.length; len >= 12; len -= 12) {
            a = add(a, fourByteToLong(buffer, pos));
            b = add(b, fourByteToLong(buffer, pos + 4));
            c = add(c, fourByteToLong(buffer, pos + 8));
            hashMix();
            pos += 12;
        }

        c += buffer.length;

        // all the case statements fall through to the next on purpose
        switch (len) {
            case 11:
                c = add(c, leftShift(byteToLong(buffer[pos + 10]), 24));
            case 10:
                c = add(c, leftShift(byteToLong(buffer[pos + 9]), 16));
            case 9:
                c = add(c, leftShift(byteToLong(buffer[pos + 8]), 8));
                // the first byte of c is reserved for the length
            case 8:
                b = add(b, leftShift(byteToLong(buffer[pos + 7]), 24));
            case 7:
                b = add(b, leftShift(byteToLong(buffer[pos + 6]), 16));
            case 6:
                b = add(b, leftShift(byteToLong(buffer[pos + 5]), 8));
            case 5:
                b = add(b, byteToLong(buffer[pos + 4]));
            case 4:
                a = add(a, leftShift(byteToLong(buffer[pos + 3]), 24));
            case 3:
                a = add(a, leftShift(byteToLong(buffer[pos + 2]), 16));
            case 2:
                a = add(a, leftShift(byteToLong(buffer[pos + 1]), 8));
            case 1:
                a = add(a, byteToLong(buffer[pos + 0]));
                // case 0: nothing left to add
        }
        hashMix();

        return c;
    }

    /**
     * See hash(byte[] buffer, long initialValue)
     *
     * @param buffer Byte array that we are hashing on.
     * @return Hash value for the buffer.
     */
    public long hash(byte[] buffer) {
        return hash(buffer, 0);
    }
}

另一段代码:

/**
 * 
 */
package com.Experiment.ThemeIdentify.PageRank;

import java.net.URL;
import java.net.URLConnection;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;

import com.Experiment.ThemeIdentify.PageRank.JenkinsHash;

/**
 * cee.open.pagerank.GooglePageRank.java
 * 
 * PageRankService provides simple API to Google PageRank Technology
 * 
 * PageRankService queries google toolbar webservice and returns a google page
 * rank retrieved from one of the next datacenters on the list.
 * toolbarqueries.google.com
 *
 * @version $Revision:$
 *          $Author:$
 */
public class GooglePageRank {
	

	/**
	 * List of available google datacenter IPs and addresses
	 */
	static final public String GOOGLE_PR_DATACENTER_IP = "toolbarqueries.google.com";
	//static final public String GOOGLE_PR_DATACENTER_IP = "www.google.com";

	/**
	 * Must receive a domain in form of: "http://www.domain.com"
	 * 
	 * @param domain 
	 * @return PR rating (int) or -1 if unavailable or internal error happened.
	 */
	public int getPageRank(String domain) {
		
		JenkinsHash jHash = new JenkinsHash();
		long hash = jHash.hash(("info:" + domain).getBytes());

		String url = "http://" + GOOGLE_PR_DATACENTER_IP
				+ "/tbr?client=navclient-auto&hl=en&" + "ch=6" + hash
				+ "&ie=UTF-8&oe=UTF-8&features=Rank&q=info:" + domain;

		try {
			URLConnection conn = new URL(url).openConnection();
			String pageRankResponse = IOUtils.toString(conn.getInputStream());
			
			if (StringUtils.isNotBlank(pageRankResponse)) {
				return NumberUtils.toInt(pageRankResponse.split(":")[2].trim());
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return -1;
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		GooglePageRank prService = new GooglePageRank();
		System.out.println("PageRank: " + prService.getPageRank("http://www.baidu.com"));
	}
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值