前言
最近手头工作需要调用一下谷歌翻译,然后在网上找了一些方法,无奈发现都会被封。最后我写了个爬虫用浏览器模拟来模拟操作,这样速度较慢,但是不会被封。
方法一
直接调用googletrans包,该方法简单粗暴,但是会被封ip,小量数据可用。示例:
from googletrans import Translator
translator = Translator()
print(translator.translate('星期日', dest='en').text)
方法二
使用爬虫,用浏览器模拟操作(也试过不用浏览器,最后也会被封),该方法速度较慢,但是不会被封。示例:
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from retry import retry
chrome_options = Options()
# 隐藏浏览器界面
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
@retry(tries=3, delay=1)
def translate(input, target):
base_url = 'https://translate.google.cn/#view=home&op=translate&sl=auto&tl=%s' % target
if browser.current_url != base_url:
browser.get(base_url)
submit = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="source"]')))
submit.clear()
submit.send_keys(input)
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//span[@class="tlid-translation translation"]')))
source = etree.HTML(browser.page_source)
result = source.xpath('//span[@class="tlid-translation translation"]//text()')[0]
return result
if __name__ == '__main__':
for i in range(100):
print(translate('中英翻译测试', target='en'))
print(translate('再测试一下', target='en'))
print(translate('hello world', target='zh-CN'))
browser.quit()