利用xpath爬取5i5j租房信息 并保存到数据库

import requests
from lxml import etree
import time
from urllib import request
import pymysql


class MyMysql(object):
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','******','wang')
        self.cursor = self.db.cursor()

    def excute_sql(self,sql,data):
        self.cursor.execute(sql,data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

sql = 'insert into wuaiwujia(title,info,price,url) values(%s,%s,%s,%s)'
msq = MyMysql()

headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    # 'Accept-Encoding':'gzip, deflate, br',
    'Accept-Language':'zh-CN,zh;q=0.9',
    'Connection':'keep-alive',
    'Cookie':'PHPSESSID=sce5m950d8s6j1rsolvurilqt6; domain=bj; _ga=GA1.2.458079001.1534494421; _gid=GA1.2.416906921.1534494421; yfx_c_g_u_id_10000001=_ck18081716270214385525539906157; yfx_f_l_v_t_10000001=f_t_1534494422429__r_t_1534494422429__v_t_1534494422429__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534494427; _gat=1; _Jo0OQK=F470CFFFBC34B6B56771705270055E0944C01FF803A6D40EAC91C723C4D09E7306981EC7E98E891829B31B433D51F3A51012D4582E317819D88C240DD9D35088E68C57212F12283777C840763663251ADEB840763663251ADEBC088D32EA9E0F2ECA593CD374DB85252GJ1Z1Lw==; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534494548',
    'Host':'bj.5i5j.com',
    'Upgrade-Insecure-Requests':'1',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
for i in range(1,4):
    url = 'https://bj.5i5j.com/zufang/huilongguan/n%s' % i
    response = requests.get(url,headers)
    # print(response.text)
    html = response.text
    html_ele = etree.HTML(html)

    li_list = html_ele.xpath('//ul[@class="pList"]/li')
    # print(len(li_list))
    for res in li_list:
        title = res.xpath('./div[2]/h3/a')[0].text
        print(title)
        info = res.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','')
        print(info)
        price = res.xpath('./div[2]/div[1]/div/p/strong')[0].text
        print(price)
        fang_url = res.xpath('./div[2]/h3/a/@href')[0]
        fangyuan_url = request.urljoin(url,fang_url)
        data = (title,info,price,fangyuan_url)
        msq.excute_sql(sql,data)
        time.sleep(2)
        # break
    print('第%s页保存完毕'%i)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值