方法一:
刚开始用selenium来爬
public static String changeArticle(String srcTxt) {
WebDriver driver = SeleniumUtil.getNoHeadDriver();
driver.get("http://fanyi.youdao.com/");
try {
driver.findElement(By.id("inputOriginal")).sendKeys(srcTxt);
String result = driver.findElement(By.id("transTarget")).getAttribute("innerHTML");
while (result == null || "".equals(result)) {
Thread.sleep(100);
result = driver.findElement(By.id("transTarget")).getAttribute("innerHTML");
}
result = HtmlUtil.changeP(result);
result=StringUtil.kill(result, "<", ">");
SeleniumUtil.click(driver, driver.findElement(By.id("inputDelete")));
driver.findElement(By.id("inputOriginal")).sendKeys(result);
Thread.sleep(100);
result = driver.findElement(By.id("transTarget")).getAttribute("innerHTML");
while (result == null || "".equals(result)) {
Thread.sleep(100);
result = driver.findElement(By.id("transTarget")).getAttribute("innerHTML");
}
result = HtmlUtil.changeP(result);
result=StringUtil.kill(result, "<", ">");
return result;
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
driver.close();
driver.quit();
}
return "";
}
效率太低,放弃了
方法二:
看了网上的,注释掉salt、sign
fanyi.youdao.com/translate_o改成了fanyi.youdao.com/translate
英文转中文没问题,中文转英文,返回的就是乱码,我一度以为是我的post方法导致的,折腾了一整夜,放弃了
public static String translate_en2zh(Map<String, String> params,String i,String from,String to) throws Exception {//这个只能英文转中文
Map<String,String> forms=new HashMap<String,String>();
forms.put("i", i);
forms.put("from",from);//en zh-CHS vi
forms.put("to", to);
forms.put("smartresult", "dict");
forms.put("client", "fanyideskweb");
// forms.put("salt", "16374128938108");
// forms.put("sign", "40869be25369a6f5a33a689596a4ffea");
forms.put("lts", "1637412893810");
forms.put("bv", "b0ff5d17f404993192085bf8b1e93587");
forms.put("doctype", "json");
forms.put("version", "2.1");
forms.put("keyfrom", "fanyi.web");
forms.put("action", "FY_BY_REALTlME");
String post = SpiderUtil.post("https://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule", params, forms);
return post;
}
方法三:
破解sign的奥秘
网上有现成的方法,但是给我返回了{"errorCode":50}
后来发现是sign的密钥不同。自己写了个getCode()方法,让它自动获取,总算没问题了。
我后来想将越南语转成英文,返回了{"errorCode":40}。再看了一下网站,发现都是中文转其他,或者其他转中文,没有其他转其他的操作。应该是网站不支持这种转换,不了了之。
public static String translate(Map<String, String> params,String i,String from,String to) throws Exception {
params.clear();
params.put("Cookie","OUTFOX_SEARCH_USER_ID=1799185238@10.169.0.83;");
params.put("Referer","http://fanyi.youdao.com/");
params.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
Map<String,String> forms=new HashMap<String,String>();
String bv= DigestUtils.md5Hex("Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36");
String ts=String.valueOf(System.currentTimeMillis());
String slat = String.valueOf(ts + (long) (Math.random() * 10 + 1));
String sign = DigestUtils.md5Hex("fanyideskweb" + i + slat + getCode());
forms.put("i", i);
forms.put("from",from);//en zh-CHS vi
forms.put("to", to);
forms.put("smartresult", "dict");
forms.put("client", "fanyideskweb");
forms.put("salt", slat);
forms.put("sign", sign);
forms.put("lts", ts);
forms.put("bv", bv);
forms.put("doctype", "json");
forms.put("version", "2.1");
forms.put("keyfrom", "fanyi.web");
forms.put("action", "FY_BY_REALTlME");
forms.put("typoResult", "true");
String post = SpiderUtil.post("http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule", params, forms);
return post;
}
private static String getCode() throws IOException{
String downhref="";
String page=SpiderUtil.getPageContent("http://fanyi.youdao.com/");
for(String s:Hp.find(page, "script[attr=src]")){
if(s.endsWith("fanyi.min.js")){
downhref=s;
break;
}
}
String tem2="c:/temspace/tem"+System.currentTimeMillis()+".txt";
SpiderUtil.download(downhref, tem2);
String file = FileUtil.getFile(tem2);
FileUtil.del(tem2);
String feature = SearchUtil.getListWithFeature(file, Arrays.asList("sign:n.md5(\"","\")")).get(0);
feature=feature.replace("sign:n.md5(\"fanyideskweb\"+e+i+\"", "").replace("\")", "");
return feature;
}