利用xpath爬取5i5j租房信息并保存到数据库

最新推荐文章于 2021-01-17 13:46:46 发布

TheSkies

最新推荐文章于 2021-01-17 13:46:46 发布

阅读量997

点赞数

分类专栏：基础爬虫文章标签：爬取租房信息

本文链接：https://blog.csdn.net/weixin_38920937/article/details/81813336

版权

基础爬虫专栏收录该内容

16 篇文章 0 订阅

订阅专栏

import requests
from lxml import etree
import time
from urllib import request
import pymysql


class MyMysql(object):
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','******','wang')
        self.cursor = self.db.cursor()

    def excute_sql(self,sql,data):
        self.cursor.execute(sql,data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

sql = 'insert into wuaiwujia(title,info,price,url) values(%s,%s,%s,%s)'
msq = MyMysql()

headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    # 'Accept-Encoding':'gzip, deflate, br',
    'Accept-Language':'zh-CN,zh;q=0.9',
    'Connection':'keep-alive',
    'Cookie':'PHPSESSID=sce5m950d8s6j1rsolvurilqt6; domain=bj; _ga=GA1.2.458079001.1534494421; _gid=GA1.2.416906921.1534494421; yfx_c_g_u_id_10000001=_ck18081716270214385525539906157; yfx_f_l_v_t_10000001=f_t_1534494422429__r_t_1534494422429__v_t_1534494422429__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534494427; _gat=1; _Jo0OQK=F470CFFFBC34B6B56771705270055E0944C01FF803A6D40EAC91C723C4D09E7306981EC7E98E891829B31B433D51F3A51012D4582E317819D88C240DD9D35088E68C57212F12283777C840763663251ADEB840763663251ADEBC088D32EA9E0F2ECA593CD374DB85252GJ1Z1Lw==; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534494548',
    'Host':'bj.5i5j.com',
    'Upgrade-Insecure-Requests':'1',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
for i in range(1,4):
    url = 'https://bj.5i5j.com/zufang/huilongguan/n%s' % i
    response = requests.get(url,headers)
    # print(response.text)
    html = response.text
    html_ele = etree.HTML(html)

    li_list = html_ele.xpath('//ul[@class="pList"]/li')
    # print(len(li_list))
    for res in li_list:
        title = res.xpath('./div[2]/h3/a')[0].text
        print(title)
        info = res.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','')
        print(info)
        price = res.xpath('./div[2]/div[1]/div/p/strong')[0].text
        print(price)
        fang_url = res.xpath('./div[2]/h3/a/@href')[0]
        fangyuan_url = request.urljoin(url,fang_url)
        data = (title,info,price,fangyuan_url)
        msq.excute_sql(sql,data)
        time.sleep(2)
        # break
    print('第%s页保存完毕'%i)

TheSkies

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
利用xpath爬取5i5j租房信息并保存到数据库

import requestsfrom lxml import etreeimport timefrom urllib import requestimport pymysqlclass MyMysql(object): def __init__(self): self.db = pymysql.connect('127.0.0.1','root','****...
复制链接

扫一扫