google 翻译爬虫 java

package com.fly.design.translation.google;


import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;
import org.junit.Test;

import java.io.IOException;

import static org.junit.Assert.*;

/**
 * @author weijun.zou
 * Create on 2018/1/18
 */
public class GoogleTranslatorTest {
    @Test
    public void translate()
            throws IOException, InterruptedException {
        Translator translator = new GoogleTranslator();
        assertEquals("Dollars",translator.translate("美元", Language.CHINESE,Language.ENGLISH));
        assertEquals("Peaceful",translator.translate("平安", Language.CHINESE,Language.ENGLISH));
        assertEquals("美元",translator.translate("Dollars", Language.ENGLISH,Language.CHINESE));
        assertEquals("一个",translator.translate("a", Language.ENGLISH,Language.CHINESE));
    }
}
package com.fly.design.translation;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public enum Language {
    CHINESE,ENGLISH
}
package com.fly.design.translation;

import java.io.IOException;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public interface Translator {
    String translate(String value, Language input,Language output) throws IOException, InterruptedException;
}
package com.fly.design.translation.google;

import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;

import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import static com.fly.design.translation.Language.*;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public class GoogleTranslator implements Translator {

    private static final Logger log = LoggerFactory.getLogger(GoogleTranslator.class);
    private static final String PATH = "https://translate.google.cn/translate_a/single";
    private static final NameValuePair[] ARG_LIST = {
            new BasicNameValuePair("client", "t"),
            new BasicNameValuePair("hl", toArgs(Language.CHINESE)),
            new BasicNameValuePair("dt", "at"),
            new BasicNameValuePair("dt", "bd"),
            new BasicNameValuePair("dt", "ex"),
            new BasicNameValuePair("dt", "ld"),
            new BasicNameValuePair("dt", "md"),
            new BasicNameValuePair("dt", "qca"),
            new BasicNameValuePair("dt", "rw"),
            new BasicNameValuePair("dt", "rm"),
            new BasicNameValuePair("dt", "ss"),
            new BasicNameValuePair("dt", "t"),
            new BasicNameValuePair("ie", "UTF-8"),
            new BasicNameValuePair("oe", "UTF-8")
    };


    private HttpClient client = HttpClients.createDefault();

    @Override
    public String translate(String value, Language input, Language output)
            throws IOException, InterruptedException {
        HttpGet httpGet = new HttpGet(createURI(value.trim(), input, output));
        return client.execute(httpGet, response -> {
            String data = EntityUtils.toString(response.getEntity());
            int start = data.indexOf('\"') + 1;
            int end = data.indexOf('\"', start);
            log.info(data);
            return data.substring(start, end);
        });
    }


    private static URI createURI(String value, Language input, Language output) {
        try {
            return new URIBuilder()
                    .setPath(PATH)
                    .setParameters(ARG_LIST)
                    .addParameters(List.of(
                            new BasicNameValuePair("sl", toArgs(input)),
                            new BasicNameValuePair("tl", toArgs(output)),
                            new BasicNameValuePair("tk", getTk(value)),
                            new BasicNameValuePair("q", value)
                    )).build();
        } catch (URISyntaxException e) {
            log.error("构建google翻译url出错", e);
            throw new RuntimeException(e);
        }
    }


    private static String getTk(String values) {
        final String KEY = "+-a^+6";
        final String LAST_KEY = "+-3^+b+-f";
        final long INIT_NUM = 406644L;
        final long REMAIN_NUM = (long) 1E6;
        final long DECIMAL_OR_NUM = 406644L;
        final long FIRST_OR_NUM = 3293161072L;
        long token = toNums(values.toCharArray()).stream()
                .reduce(INIT_NUM, (t, u) -> encode(t + u, KEY));
        token = encode(token, LAST_KEY) ^ FIRST_OR_NUM;
        token = token < 0 ? (token & Integer.MAX_VALUE) + Integer.MAX_VALUE : token;
        token %= REMAIN_NUM;
        return token + "." + (token ^ DECIMAL_OR_NUM);
    }

    private static List<Long> toNums(char[] values) {
        List<Long> valueList = new ArrayList<>();
        for (int i = 0; i < values.length; i++) {
            long value = values[i];
            if (value <= Byte.MAX_VALUE) {
                valueList.add(value);
            } else if (value <= 2048) {
                valueList.add(value >> 6 | 192);
            } else {
                if (i + 1 < values.length
                        && (values[i + 1] & 64512) == 56320) {
                    value = 65536 + ((value & 1023) << 10) + (values[++i] & 1023);
                    valueList.add(value >> 18 | 240);
                    valueList.add(value >> 12 & 63 | 128);
                } else {
                    valueList.add(value >> 12 | 224);
                    valueList.add(value >> 6 & 63 | 128);
                }
                valueList.add(value & 63 | 128);
            }
        }
        return valueList;
    }

    private static long encode(long value, String key) {
        for (int i = 0; i < key.length() - 2; i += 3) {
            char char2 = key.charAt(i + 2);
            long status = char2 >= 'a' ? char2 - 87 : Long.valueOf(char2 + "");
            status = key.charAt(i + 1) == '+' ? value >>> status : value << status;
            value = key.charAt(i) == '+' ? value + status & 4294967295L : value ^ status;
        }
        return value;
    }


    private static String toArgs(Language language) {
        return language == CHINESE ? "zh-CN"
                : language == ENGLISH ? "en"
                : "";
    }
}

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值