1.爬取搜狗页面的首页
import requests
url = 'https://www.sogou.com/' #指定url
respone = requests.get(url=url) #发起请求
page_text = respone.text #获取响应数据,text为字符串形式的响应数据
print(page_text)
with open('./sogou.html','w',encoding='utf-8') as fp:
fp.write(page_text)
print("爬取结束")
2.搜狗搜索爬取搜索关键字
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0"
}
url = 'https://www.sogou.com/web?query=1'
kw = input("enter word")
param = {
'query':kw
}
respone = requests.get(url=url,headers=headers,params=param)
page_text = respone.text
filename = kw + '.html'
with open(filename,'w',encoding='utf-8') as fp:
fp.write(page_text)
print("保存成功")
3.百度翻译爬虫