for i in range(1,4):
将循环出来的页码数加到上面定义好的路由网址里
url = base_url % i
requests的get方法获取路由网址的页面
response = requests.get(url)
使用etree的HTML方法将请求的网址页面存入一个对象里
html_ele = etree.HTML(response.text)
使用xpath获取定位需要的数据
li_list = html_ele.xpath('//ul[@id="house-lst"]/li')
for li_ele in li_list:
title = li_ele.xpath('./div[2]/h2/a')[0].text
print(title)
region = li_ele.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
print(region)
# ./div[2]/div[1]/div[1]/span[1]/span
zone = li_ele.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
print(zone)
meters = li_ele.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
print(meters)
location = li_ele.xpath('./div[2]/div[1]/div[1]/span[3]')[0].text
print(location)
price = li_ele.xpath('.//div[@class="price"]/span')[0].text
print(price)
将数据存储到mysql数据库
data = (title, region, zone, meters, location, price)
myhelper.execute_modify_sql(sql, data)