#这里导入我们需要用的模块,并连接SQLyog,并创建游标
import requests
import refromlxml import etree
import pymysql
import time
conn=pymysql.connect(host='localhost',user='root',passwd='1234',db='mydatabase1',port=3306,charset='utf8')
cursor=conn.cursor()
#获取头命令,进行伪装访问浏览器,避免爬取失败被封IP:
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
#创建一个获取网址的函数:
def get_house_url(url):
html=requests.get(url,headers=headers)#利用头命令进行伪装访问网址
selector=etree.HTML(html.text) #解析源代码,使之成为我们需要的文本文档
house_hrefs=selector.xpath('//div[@class="house-title"]/a/@href')#获取连接for house_href inhouse_hrefs:
get_house_info(house_h