import re
import requests
from io import BytesIO
import ddddocr
from lxml import etree
def get_yellow_price(img_url):
response = requests.get(img_url).content
img_bytes = BytesIO(response)
ocr = ddddocr.DdddOcr(beta=True, show_ad=False)
yellow_price = ocr.classification(img_bytes.getvalue())
return yellow_price
def main():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
response = requests.get('https://www.ziroom.com/z/', headers=headers).text
# 黄色价格图片
img_url = 'https:' + re.search('//static8.ziroom.com/phoenix/pc/images/price/new-list/(.*?).png', response).group()
yellow_price = get_yellow_price(img_url)
html = etree.HTML(response)
div_list = html.xpath('//div[@class="Z_list-box"]/div') # 第5个div是广告
del div_list[4] # 删除索引为4的元素(第5个元素)
for div in div_list:
title = div.xpath('./div[3]/h5/a/text()')[0]
styles = div.xpath('.//div[3]/div[2]/div/span[position()>1]/@style')
price = ''
for style in styles:
pos = re.findall(r'background-position: -(.*?)px', style)[0]
# 红色字体价格 固定
if 'red.png' in style:
red_price = '8652039147' # 红色价格数字 不变
pos_price = red_price[int(float(pos)/20)]
# 黄色字体价格
else:
pos_price = yellow_price[int(float(pos)/21.4)]
price += pos_price
print(f"{title} ¥{price}/月")
if __name__ == "__main__":
main()
自如网租房价格反爬 CSS偏移
最新推荐文章于 2024-11-11 23:45:17 发布