from urllib import request
from urllib import parse
#拼接URL地址函数
def get_url(word):
baseurl="https://www.baidu.com/s?"
#编码+拼接
parasm=parse.urlencode({"wd":word})#编码
url=baseurl+parasm+"&usm=3&rsv_idx=2&rsv_page=1"#&usm=3&rsv_idx=2&rsv_page=1 PY百度请求百度安全验证的问题
return url
#请求+保存
def write_html(url,word):
headers={"User-Agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)"}
req=request.Request(url=url,headers=headers)#创建请求对象
res=request.urlopen(req)#获取响应对象
html=res.read().decode()
filename=word+".html"
with open(filename,"w",encoding="utf-8") as f:
f.write(html)
if __name__ == '__main__':
word=input("请输入你要查询的内容:")
url=get_url(word)
write_html(url,word)
爬虫基本样式
最新推荐文章于 2021-02-01 08:08:45 发布