import requests
def crawl():
# 准备好URL
url = 'https://www.baidu.com/s'
# 动态获取关键字相关内容
kw = input('请输入搜索内容:')
params = {
'word': kw
}
# ua检测
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.197.400 QQBrowser/11.6.5265.400'
}
# 模拟浏览器发送请求,接收返回的数据
respond_text = requests.get(url=url, params=params, headers=headers).text
fileName = './' + kw + '.html'
with open(file=fileName, mode='w+', encoding='utf-8') as f:
f.write(respond_text)
print(kw + '内容爬取完毕!')
if __name__ == '__main__':
while True:
crawl()
answer = input('是否继续搜索?y或n'+ '\n')
if answer != 'Y' and answer != 'y':
break
python爬虫之页面采集
最新推荐文章于 2023-07-28 15:05:39 发布