python爬虫实现有道自动化翻译

有道自动化翻译英文文献:
代码1:

import urllib.request
import urllib.parse
import json
#python爬虫有道
def fanyi(context = ''):
    url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
    data = {}
    data['type'] = 'AUTO'
    data['i'] = context
    data['doctype'] = 'json'
    data['version'] = '2.1'
    data['keyfrom'] = 'fanyi.web'
    data['ue'] = 'UTF-8'
    data['typoResult'] = 'true'

    head = {}
    head['Referer'] = 'http://fanyi.youdao.com/?keyfrom=dict2.top'
    head['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'


    data = urllib.parse.urlencode(data).encode('utf-8')
    req = urllib.request.Request(url,data,head)
    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')

    html = json.loads(html)
    return html['translateResult'][0][0]['tgt']
#读取文献翻译
f_r = open('./翻译/origin.txt','r')
f_w = open('./翻译/result.txt','a')
context = f_r.read()
context_list = context.split('。')
for sent in context_list:
    sen_str = sent+'。'
    fanyi_sen_str = fanyi(sen_str)
    f_w.write(fanyi_sen_str+'\n')
f_r.close()
f_w.close()


代码2:

import urllib.request
import urllib.parse
import json
import time
# 延迟访问
while True:
    url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
    content = input('请输入您要翻译的内容(输入"q"退出):')
    if (content == 'q'):
        break
    data = {}
    data['type'] = 'AUTO'
    data['i'] = content
    data['doctype'] = 'json'
    data['version'] = '2.1'
    data['keyfrom'] = 'fanyi.web'
    data['ue'] = 'UTF-8'
    data['typoResult'] = 'true'
    data = urllib.parse.urlencode(data).encode('utf-8')
    req = urllib.request.Request(url,data)
    req.add_header('Referer','http://fanyi.youdao.com/?keyfrom=dict2.top')
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36')
    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')
    html = json.loads(html)
    print('翻译结果是: %s' % (html['translateResult'][0][0]['tgt']))
    time.sleep(5)
代码4:

import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
# 实现关键词查找,中文连接的urllib.request.urlopen(url)打开问题解决
def main():
    keyword = input('请输入关键词:')
    keyword = urllib.parse.urlencode({'word':keyword})
    response = urllib.request.urlopen('https://baike.baidu.com/search/word?%s'% keyword)
    html = response.read()
    soup = BeautifulSoup(html,'html.parser')

    for each in soup.find_all(href = re.compile('item')):
        content = ''.join([each.text])
        url2 = ''.join(['https://baike.baidu.com',each['href']])
        url2_origin = url2
        #需对网址进行重新转码
        url2 = urllib.parse.quote(url2, safe=":/=?#")
        response2 = urllib.request.urlopen(url2)
        html2 = response2.read()
        soup2 = BeautifulSoup(html2,'html.parser')
        if soup2.h2:
            content = ''.join([content,soup2.h2.text])
        content = ''.join([content,'->',url2,'|||||->',url2_origin])
        print(content)
        print('***************************************************')
if __name__=='__main__':
    main()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值