```python
from bs4 import BeautifulSoup
import requests
import time # 导入相应的库文件
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'
}
def judgment_sex(class_name):
if class_name==['member_icol']:
return '女'
else:
return '男' # 定义判断用户性别的函数
def get_links(url): # 定义获取详细页的url的函数
wb_data=requests.get(url,headers=headers)
soup=BeautifulSoup(wb_data.text,'lxml')
links=soup.select('page_list>ul>li>a')
for link in links:
href=link.get('href')
get_links(href) # 循环出的url函数,一次调用get_info函数
def get_info(url):
wb_data=requests.get(url,headers=headers)
soup=BeautifulSoup(wb_data.text,'lxml')
titles=soup.select('div.pho_info>h4')
addresses=soup.select('span.pr5')
prices=soup.select("#pricePart>div.day_1>sapn")
imgs=soup.select('#floatRightBox>div.js_box.clearfix>div.member_pic>a>img')
names=soup.select("#floatRightBox>div.js_box.clearfix>div.w_240>h6>a")
sexs=soup.select('#floatRightBox>div.js_box.clearfix>div.member_pic>div')
for title,address,price,img,name,sex in zip(titles,addresses,prices,imgs,names,sexs):
data={
'title':title.get_text().strip(),
'address': address.get_text().strip(),
'price':price.get_text().strip(),
'img':img.get('src'),
'name':name.get_text(),
'sex':judgment_sex(sex.get('class'))
}
print(data) # 获取信息并通过字典的形式打印出来
if __name__=='__main__': # 程序的主入口
for i in range(1,2):
urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i)] # 构造多页url
for single_url in urls: # 循环调用get_links()函数
get_links(single_url)
time.sleep(2) # 睡眠2秒
``按照书上写的敲了一遍,代码基本一样的运行后就却只显示正常退出,没有爬到的数据,小白想了好多代码的问题都想不通,想请教各位大神,为什么是这样啊。是代码哪里错了么,感谢啦!!
运行后的结果,什么都没有啊,脑壳疼。