from urllib import request,parse
from urllib.error import HTTPError,URLError
import json
def post(url,form=None):
return urlrequest(url,form=form)
def get(url,headers=None):
return urlrequest(url, headers=None)
def urlrequest(url,headers=None,form=None):
if not headers:
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}
req = request.Request(url,headers=headers)
html = b''
try:
if form:
form_str = parse.urlencode(form)
form_bytes = form_str.encode('utf-8')
req = request.Request(url, headers=headers, data=form_bytes)
else:
req = request.Request(url,headers=headers)
response = request.urlopen(req)
html = response.read()
except HTTPError as e:
print(e)
except URLError as e:
print(e)
return html
if __name__ == '__main__':
neirong = post('http://fanyi.baidu.com/sug',form={'kw':'呵呵'})
jieguo = json.loads(neirong.decode('utf-8'))
print(jieguo['data'][0]['v'])
with open('fanyi.html','wb') as f:
f.write(post('http://fanyi.baidu.com/sug',form={'kw':'呵呵'}))
python基础爬虫代码封装--post,get方法分别爬取网页
最新推荐文章于 2022-01-28 21:08:40 发布