安居客西安房源爬取 + pyecharts 数据展示

第一步是数据爬取:

import xlwt
from bs4 import BeautifulSoup
from selenium import webdriver
import time,random
from selenium.webdriver import ChromeOptions


def main():
    baseurl = "https://xa.fang.anjuke.com/loupan/all/p"
    datalist = getData(baseurl)
    savepath = "安居客爬虫数据.xls"
    saveData(datalist, savepath)


def getData(baseurl):
    datalist = []
    headers = {
   
        "cookie": "isp=true; isp=true; aQQ_ajkguid=C762D025-2585-194F-7317-8EC539296440; _ga=GA1.2.18092521.1626051248; _gid=GA1.2.2133028006.1626051248; id58=e87rkGDrkq+BJ/A5/JzXAg==; 58tj_uuid=4604ab87-5912-4903-a6dc-28ae7ae20bc1; als=0; isp=true; wmda_uuid=dfa952c1ee878d222eeb947c5618cfd7; wmda_new_uuid=1; wmda_visited_projects=%3B8788302075828; cmctid=483; xxzl_cid=629248c0af8e4b1e8a3219e3d1e090d7; xzuid=144b3c94-6fb0-45e0-b7f8-6e43c085f8a4; ctid=31; sessid=A07D28BD-B371-B893-463C-SX0712140406; obtain_by=2; twe=2; wmda_session_id_8788302075828=1626069849725-02cb50ff-be87-431f; init_refer=; new_uv=3; lp_lt_ut=1023adc4fcf5533ab348b520b9a4ce05; ved_loupans=472113; new_session=0",
        "referer": "https://xa.fang.anjuke.com/loupan/all/p13/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
    }
    for i in range(0,26):
        url = baseurl+ str(i)
        driver = webdriver.Chrome()
        driver.get(url)
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        trs = soup.select("div.key-list>.item-mod")
        print(len(trs))
        for div in trs:
            data=[]
            name = div.select(".infos>.lp-name>.items-name")[0].get_text()
            print("名字:" + name)
            data.append(name)
            tstate = div.select(".tags-wrap>.tag-panel>i:nth-of-type(1)")
            if tstate:
                state = tstate[0].get_text()
                print(state)
                data.append(state)
            else:
                data.append(" ")
            position = div.select(".address>.list-map")[0].get_text()
            print("位置:" + position)
            data.append(position)
            thuxing = div.select("div.infos > a.huxing>span:nth-of-type(1)")
            if thuxing:
                huxing = thuxing[0].get_text()
                print(huxing)
                data.append(huxing)
            else:
                data.append(" ")
            thuxing2 = div.select("div.infos > a.huxing>span:nth-of-type(2)")
            if thuxing:
                huxing2 = thuxing[0].get_text()
                print(huxing2)
                data.append(huxing2)
            else:
                data.append(" ")
            tarea = div.select("div.infos > a.huxing > span.building-area")
            if tarea:
                area = tarea[0].get_text()
                print(area)
                data.append(area)
            else:
                data.append(" ")
            ttpe = div.select(".tags-wrap>.tag-panel>.wuyetp")
            if ttpe:
                type = ttpe[0].get_text()
                print("类型:" + type)
                data.append(type)
            else:
                data.append(" ")
            tshuxing1 = div.select(".tags-wrap>.tag-panel>span:nth-of-type(1)")
            if tshuxing1:
                shuxing1 = tshuxing1[0].get_text
  • 8
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 15
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 15
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

有裂痕的石头

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值