import requests
import os
#指定搜索关键字
word = input('enter a word you want to search:')
#自定义请求头信息
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
#指定url
url = 'https://www.sogou.com/web'
#封装get请求参数
prams = {
'query':word,
'ie':'utf-8'
}
#发起请求
response = requests.get(url=url,params=param)
#获取响应数据
page_text = response.text
with open('./sougou.html','w',encoding='utf-8') as fp:
fp.write(page_text)
需求:登录豆瓣电影,爬取登录成功后的页面数据
import requests
import os
url = 'https://accounts.douban.com/login'
#封装请求参数
data = {
"source": "movie",
"redir": "https://movie.douban.com/",
"form_email": "15027900535",
"form_password": "bobo@15027900535",
"login": "登录",
}
#自定义请求头信息
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
response = requests.post(url=url,data=data)
page_text = response.text
with open('./douban111.html','w',encoding='utf-8') as fp:
fp.write(page_text)
import requests
import os
#指定搜索关键字
word = input('enter a word you want to search:')
#指定起始页码
start_page = int(input('enter start page num:'))
end_page = int(input('enter end page num:'))
#自定义请求头信息
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
#指定url
url = 'https://zhihu.sogou.com/zhihu'
#创建文件夹
if not os.path.exists('./sougou'):
os.mkdir('./sougou')
for page in range(start_page,end_page+1):
#封装get请求参数
params = {
'query':word,
'ie':'utf-8',
'page':str(page)
}
#发起post请求,获取响应对象
response = requests.get(url=url,params=params)
#获取页面数据
page_text = response.text
fileName = word+'_'+str(page)+'.html'
filePath = './sougou/'+fileName
with open(filePath,'w',encoding='utf-8') as fp:
fp.write(page_text)
print('爬取'+str(page)+'页结束')