引入库
1.request来获取页面内容
https://requests.readthedocs.io/zh_CN/latest/
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple request
2.BeautifulSoup
https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple bs4
import requests
from bs4 import BeautifulSoup
url = "https://bj.lianjia.com/zufang/"
response = requests.get(url)
response
soup = BeautifulSoup(response.text, "lxml")
link_div=soup.find_all('div',class_='content__list--item')
links=['https://bj.lianjia.com'+div.a.get('href') for div in link_div]
links
def get_page(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
return soup
def get_links(links_url):
soup = get_page(links_url)
link_div=soup.find_all('div',class_='content__list--item')
links=['https://bj.lianjia.com'+div.a.get('href') for div in link_div]
return links
house_url = "https://bj.lianjia.com/zufang/BJ2600620531699433472.html"
soup = get_page(house_url)
price = soup.find('div',class_='content__aside--title').span.text
soup.find('ul',class_='content__aside__list').find_all('li')[1].text