import requests
from lxml import etree
from day03.pymysql_text import Mysql_text
#封装一个函数
def lainjia(url):
response = requests.get(url)
# with open('lianjia.html','wb')as f:
# f.write(response.content)
# 调用etree的HTML函数返回一个ele对象
lj_ele = etree.HTML(response.text)
#先找到总的li的path
lj_list = lj_ele.xpath('//ul[@id="house-lst"]/li')
#循环所有的li
for li_list in lj_list:
#依次获取自己想要的内容
li_title = li_list.xpath('./div[2]/h2/a')[0].text
print(li_title)
li_region = li_list.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
print(li_region)
li_zone = li_list.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
print(li_zone)
li_dx = li_list.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
print(li_dx)
li_price = li_list.xpath('./div[2]/div[2]/div[1]/span[1]')[0].text
print(li_price)
#把获取到的数据组成一个元组
data = (li_title,li_region,li_zone,li_dx,li_price)
#调用实例化pymysql对象的执行sql语句的方法
m.sqlzz(sql,data)
#实例化对象,这个封装的类就是之前写的博客的数据库操作的类
m = Mysql_text()
sql = 'insert into lianjia(title,region,zone,dx,price) VALUE (%s,%s,%s,%s,%s)'
#循环url路径
for i in range(1,4):
url = 'https://bj.lianjia.com/ditiezufang/pg%srp1/'%i
lainjia(url)
一个简单的恋家的信息爬取
最新推荐文章于 2024-10-18 14:25:40 发布