1.打开Anaconda Prompt
创建虚拟环境,python 3.6.2,切换虚拟环境并安装requests库
conda create -n spider_learn python=3.6.2(python版本自己指定)
conda activate spider_learn
conda install requests
2.使用GET方式抓取数据
import requests
url='https://www.discovermagazine.com/'
# GET方式,获取网页数据
strhtml=requests.get(url=url)
print(strhtml.text)
3.使用POST方式抓取数据
4.使用BeautifulSoup解析网页
import requests
from bs4 import BeautifulSoup
url='https://top.baidu.com/board?platform=pc&sa=pcindex_entry'
strhtml=requests.get(url)
# 使用lxml解析网页文档
soup=BeautifulSoup(strhtml.text,'lxml')
# 获取数据
data=soup.select('#sanRoot > main > div.hot-wrap_1nNog > div.theme-hot.category-item_1fzJW > div.list_1EDla > a:nth-child(2) > div.normal_1fQqB > div.content-wrap_1RisM > div > div')
print(data)