诸葛找房房源信息爬取

import requests
from bs4 import BeautifulSoup
import pymongo
import datetime

import re 

lg = '15001927982ttcc'

lgttcc = re.sub("\D", "", lg)


headers={
        'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
        }
a=['bj','sh','dl','dg','gz','jn','jh','lz','qd','tj',
   'xa','zz','zh','zs','cd','cq','cz','cs','fz','hz','heb',
   'hf','hn','hz','km','nj','nc','nb','sz','sy','sz','sjz','wh','wx','yt'
   ]
def ad(i):

    cc='http://'+i+'.zhugefang.com/'
    return cc


b=list(map(ad,a))

proxies = {
  "http": "http://192.168.0.103:3234"
}




for k in b:



    res=requests.get(k,proxies=proxies)
    soup=BeautifulSoup(res.text,'html.parser')
    #print(soup.text)

    #price=soup.select('body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
    area=soup.select('.banner_left')[0].text.strip()
    #print(area)
    xinshang=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > p')[0].text.strip()
    #print(xinshang)
    xinshang_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > h5')[0].text.strip()
    #print(xinshang_tao)
    jiangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > p')[0].text.strip()
    #print(jiangjia)
    jiangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > h5')[0].text.strip()
    #print(jiangjia_tao)
    zhangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > p')[0].text.strip()
    #print(zhangjia)
    zhangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > h5')[0].text.strip()
    #print(zhangjia_tao)
    #print(area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao)
    junjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
                        body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p
    for ele in area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao,junjia:
        total.append(ele)
    print(total)

    import pandas
    deal=pandas.DataFrame(total)
    print(deal)

    price=total[7]

    pp = re.sub("\D", "", price)

    """
    try:
        now=datetime.datetime.now()
        date="2017-12-20"
        client1 = pymongo.MongoClient('192.168.0.136',27017)
        db1 = client1.fangjia_stat
        stat = db1.zgzf_stat
        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
               "stat_date":date,"c_date":now})

    except:
        now=datetime.datetime.now()
        date="2017-12-20"
        client1 = pymongo.MongoClient('192.168.0.136',27017)
        db1 = client1.fangjia_stat
        stat = db1.zgzf_stat
        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
               "stat_date":date,"c_date":now})
    """

这里写图片描述
截图

展开阅读全文

没有更多推荐了,返回首页