python爬虫教程(一)静态网页抓取
安装Requests
使用pip或者conda均可
pip install requests
conda install requests
获取相应内容
import requests
r= requests.get("https://www.baidu.com")
print("文本编码:",r.encoding)
print("响应状态码:",r.status_code)
print("服务器相应内容:",r.text)
结果:
定制Requests
传递URL参数
定制请求头
发送POST请求
超时
超时设置:
实践(豆瓣TOP250所有电影名称)
import requests
from bs4 import BeautifulSoup
def getTop250():
movie_list = []
for i in range(0, 10):
http = "https://movie.douban.com/top250?start=" + str(i * 25)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
'Host': 'movie.douban.com'
}
r = requests.get(http, headers=headers)
# BeautifulSoup对内容解析
soup = BeautifulSoup(r.text, "html.parser")
list = soup.find_all('div', class_='hd')
for each in list:
movie = each.a.span.text.strip()
movie_list.append(movie)
return movie_list
movie_list = getTop250()
for (index, movie) in enumerate(movie_list):
print(index + 1, '. ', movie)