import re,requests
from bs4 import BeautifulSoup
def get_page_info(page=1):
url="https://www.danke.com/room/sh?page="+str(page)
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36",
"Referer":"https://www.danke.com/sh"
}
r=requests.get(url,headers=headers)
soup=BeautifulSoup(r.text,'html.parser')
house_list=soup.find_all("div",attrs={"class":"r_lbx"})
for once in house_list:
house_name=once.find("div",attrs={"class":"r_lbx_cena"}).a.attrs['title']
house_url=once.find("div",attrs={"class":"r_lbx_cena"}).a.attrs['href']
house_price=once.find("span",attrs={"class":"ty_b"}).text.strip()
house_station_info=once.find("div",attrs={"class","sub_img"}).next_element.strip()
house_info4=once.find(text=re.compile("㎡"))
house_info4=re.sub("\s",'',house_info4)
try:
house_discount=once.find("div",attrs={"class":"new-price-link"}).text
house_discount=re.sub("\s",'',house_discount)[:-2]
except AttributeError :
house_discount="暂无优惠"
print(house_discount)
for page in range(1,5):
get_page_info(page)
C:\Anaconda3\python.exe D:/AI/01-AI基础/AI-3-Python高级/01爬虫PPT/day3_lxml和动态请求/demo1-蛋壳公寓BeautifulSoup.py
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
暂无优惠
Process finished with exit code 0