import requests
from bs4 import BeautifulSoup
def get_time_info(url):
url = "http://zhuanzhuan.58.com/detail/926455279137210889z.shtml?fullCate=&fullLocal=&zzfrom=NaN&zhuanzhuanSourceFrom1223"
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, "lxml")
title_list = soup.select("h1.info_titile")
title = title_list[0].text
price_list = soup.select('span.price_now > i')
price = price_list[0].text
views_list =soup.select('span.look_time')
views =views_list[0].text
area_list = soup.select('div.palce_li > span > i')
area = area_list[0].text
cate_list =soup.select('span.crb_i > a')
cate = cate_list[-1].text
data = {
'title': title,
'views': views,
'price': price,
'area': area,
'cate': cate,
}
print(data)
def get_all_items_info():
url = "http://hz.58.com/bijiben/"
wb_date = requests.get(url)
soup =BeautifulSoup(wb_date.text,'lxml')
print(soup)
hrefs_list = soup.select('a.t')
for href in hrefs_list:
link = href.get('href')
if 'zhuanzhuan' in link:
get_time_info(link)
get_all_items_info()
爬取转转网站
最新推荐文章于 2024-08-16 09:15:24 发布