5i5j
xpath练习
import requests
from lxml import etree
from pymysql_1 import mysql_a
base_url = 'https://bj.5i5j.com/zufang/huilongguan/n%s/'
pysql = mysql_a()
p = {
'http':'http:106.75.9.39:8080'
}
sql = 'insert into 5j5j(title,mianji,address,price) values(%s,%s,%s,%s)'
for i in range(1,11):
url = base_url % i
headers = {
'Referer': 'https://bj.5i5j.com/zufang/huilongguan/',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
response = requests.get(url,headers=headers,proxies=p)
print(response)
# with open('5j5j.html','wb') as f:
# f.write(response.content)
html_ele = etree.HTML(response.text)
print(html_ele)
list_li = html_ele.xpath('//ul[@class="pList"]/li')
for ietm in list_li:
# 标题
title = ietm.xpath('./div[2]/h3/a')[0].text
# 面积
mianji = ietm.xpath('./div[2]/div[1]/p[1]/text()')
# 地址
address = ietm.xpath('./div[2]/div[1]/p[2]/a')[0].text
# 价格
price = ietm.xpath('./div[2]/div[1]/div/p[1]/strong')[0].text
data = (title,mianji,address,price)
# print(sql)
pysql.execute_modify_mysql(sql,data)