爬虫练习

5i5j

xpath练习

import requests
from lxml import etree
from  pymysql_1 import mysql_a


base_url = 'https://bj.5i5j.com/zufang/huilongguan/n%s/'

pysql = mysql_a()


p = {
    'http':'http:106.75.9.39:8080'
}

sql = 'insert into 5j5j(title,mianji,address,price) values(%s,%s,%s,%s)'
for i in range(1,11):
    url = base_url % i

    headers = {
        'Referer': 'https://bj.5i5j.com/zufang/huilongguan/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    }
    response = requests.get(url,headers=headers,proxies=p)
    print(response)
    # with open('5j5j.html','wb') as f:
    #     f.write(response.content)

    html_ele = etree.HTML(response.text)
    print(html_ele)
    list_li = html_ele.xpath('//ul[@class="pList"]/li')

    for ietm in list_li:
        # 标题
        title = ietm.xpath('./div[2]/h3/a')[0].text
        # 面积

        mianji = ietm.xpath('./div[2]/div[1]/p[1]/text()')
        # 地址
        address = ietm.xpath('./div[2]/div[1]/p[2]/a')[0].text
        # 价格
        price = ietm.xpath('./div[2]/div[1]/div/p[1]/strong')[0].text

        data = (title,mianji,address,price)


        # print(sql)
        pysql.execute_modify_mysql(sql,data)



 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值