import requests
from lxml import etree
import time
from urllib import request
import pymysql
class MyMysql(object):
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','******','wang')
self.cursor = self.db.cursor()
def excute_sql(self,sql,data):
self.cursor.execute(sql,data)
self.db.commit()
def __del__(self):
self.cursor.close()
self.db.close()
sql = 'insert into wuaiwujia(title,info,price,url) values(%s,%s,%s,%s)'
msq = MyMysql()
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
# 'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Connection':'keep-alive',
'Cookie':'PHPSESSID=sce5m950d8s6j1rsolvurilqt6; domain=bj; _ga=GA1.2.458079001.1534494421; _gid=GA1.2.416906921.1534494421; yfx_c_g_u_id_10000001=_ck18081716270214385525539906157; yfx_f_l_v_t_10000001=f_t_1534494422429__r_t_1534494422429__v_t_1534494422429__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534494427; _gat=1; _Jo0OQK=F470CFFFBC34B6B56771705270055E0944C01FF803A6D40EAC91C723C4D09E7306981EC7E98E891829B31B433D51F3A51012D4582E317819D88C240DD9D35088E68C57212F12283777C840763663251ADEB840763663251ADEBC088D32EA9E0F2ECA593CD374DB85252GJ1Z1Lw==; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534494548',
'Host':'bj.5i5j.com',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
for i in range(1,4):
url = 'https://bj.5i5j.com/zufang/huilongguan/n%s' % i
response = requests.get(url,headers)
# print(response.text)
html = response.text
html_ele = etree.HTML(html)
li_list = html_ele.xpath('//ul[@class="pList"]/li')
# print(len(li_list))
for res in li_list:
title = res.xpath('./div[2]/h3/a')[0].text
print(title)
info = res.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','')
print(info)
price = res.xpath('./div[2]/div[1]/div/p/strong')[0].text
print(price)
fang_url = res.xpath('./div[2]/h3/a/@href')[0]
fangyuan_url = request.urljoin(url,fang_url)
data = (title,info,price,fangyuan_url)
msq.excute_sql(sql,data)
time.sleep(2)
# break
print('第%s页保存完毕'%i)
利用xpath爬取5i5j租房信息 并保存到数据库
最新推荐文章于 2021-01-17 13:46:46 发布