这也可以算做爬虫的一个小应用吧,可以自己给自己做个英语词典
# -*- coding: UTF-8 -*-
import requests #导入requests包
from bs4 import BeautifulSoup
import sys
def basicDict(data):
result={'title':''}
for item in data:
result = {
'title': item.get_text()
}
content = result['title']
content = content.replace('\t', '')
content = content.split('\n')
i = 0
result = ''
s=''
for item in content:
if 'document.write'in item or '}' in item or '{' in item:
continue
s=''
if len(item) > 0:
if '"' in item:
s=''
elif ':' in item or '.' in item and ';'not in item:
i = 0
s=' '+item+'\n'
else:
i=i+1
s = ' ' + '%d' % i + '.'+item+'\n'
result = result + s
return result
def pronunce(data):
result={'title':''}
for item in data:
result = {
'title': item.get_text()
}
return result['title']
def dict(word):
url = 'https://dict.cn/search?q='+word
strhtml = requests.get(url) #Get方式获取网页数据
soup=BeautifulSoup(strhtml.text,'lxml')
#获取音标
data0= soup.select('#content > div.main > div.word > div.phonetic > span:nth-child(1) > bdo')
#获取中文意思
data1= soup.select('#content > div.main > div.word > div.basic.clearfix > ul')
#获取英文解释
data2 = soup.select('#content > div.main > div.section.def > div.layout.en')
return word + pronunce(data0) + '\n' + basicDict(data1) + basicDict(data2)
ifileName ='wordslist.txt'
ofileName = 'result.txt'
if len(sys.argv) == 2:
ifileName = sys.argv[1]
elif len(sys.argv) == 3:
ifileName = sys.argv[1]
ofileName = sys.argv[2]
#必须注明文件的编码方式
fi=open(ifileName,mode='r',encoding='utf-8')
fo=open(ofileName,mode='w',encoding='utf-8')
for word in fi:
word = word.replace('\n', '')
try:
result=dict(word)
fo.write(result)
except ValueError:
jj = 0
fi.close()
fo.close()