Python学习,房价代码更新

一.知识准备

我们需要学习以下python库,包括爬虫获取数据的requests库,pandas读取cvs文件的库,BeautifulSoup网页截取库,csv保存数据到csv文件的库,numpy科学计算库,matplotlib数据可视化库,具体库知识学习可以下面评论留言

import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
import numpy as np
from matplotlib.pyplot import MultipleLocator
import matplotlib.pyplot as plt

二.代码更新(加入更多数据集,爬取多个网站数据,进行可视化显示)

"""""
    作者:cpz
    目的:盐城房价分析
    版本:2.0
    时间:30/07/2019
"""""


import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
import numpy as np
from matplotlib.pyplot import MultipleLocator
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def get_building_price(create_utl_list):

    build_name_list = []
    build_price_list = []
    for url in create_utl_list:
        print(url)
        rtext = requests.get(url,timeout=30)
        soup = BeautifulSoup(rtext.text,'lxml')
        buliding_list = soup.find_all('div',{'class':'lp-list'})[0]
        #print(buliding_list)
        building_2_list = buliding_list.find_all('li',{'class':'list-item clearfix'})
        #print(building_2_list)
        #print(rtext.status_code)
        #print(len(building_2_list))

        for build in building_2_list:
            build_name = build.find_all('a',{'class':'tit'})
            build_price_div = build.find_all('div',{'class':'other fr'})
            build_price__em = build_price_div[0].find_all('em',{'class':'arial'})
            build_name_str = build_name[0].text.split('\n')[0]
            build_name_list.append(build_name_str)
            build_price_list.append(build_price__em[0].text)

    #print(build_name_list)
    #print(build_price_list)
    #buildings_name_list = building_2_list.find_all('a',{'class':'tit'})[0]
    #print(buildings_name_list)
    return build_name_list,build_price_list

def get_clean_data(build_name_list,build_price_list):
    build_price__name_int_list = []
    for i in range(len(build_price_list)):
        build_price__name_int_list.append((build_name_list[i],build_price_list[i]))
    print(build_price__name_int_list)
    Header = ['BuildName','Price']
    buildName = []
    buildPrice = []
    with open('yancheng_build_price.csv','w',encoding='utf-8',newline='') as f:
        writer = csv.writer(f)
        writer.writerow(Header)
        for i,build in enumerate(build_price__name_int_list):
            if build[1] !='待定':
                build_name = build[0]
                buildName.append(build_name)
                build_price = int(build[1])
                buildPrice.append(build_price)
                row = [build_name]+[build_price]
                writer.writerow(row)

    return buildName,buildPrice


def Create_url():
    url_list = []
    url_list.append('http://danke00.com/xinfang/1261')
    for i in range(1,5):
        url_list.append('http://danke00.com/xinfang/1261-no-no-no-no-no-no-no-no-no/'+str(i+1))

    #print(url_list)
    return url_list

def main():
    cerate_url = Create_url()
    build_name_list,build_price_list = get_building_price(cerate_url)
    buildName,buildPrice = get_clean_data(build_name_list,build_price_list)
    build_name_price_list = pd.read_csv('yancheng_build_price.csv')
    print(build_name_price_list.info())

    build_name_price_list.plot(kind='line',x='BuildName',y='Price',title='盐城盐都区各在售小区房价曲线图',figsize=(60,5),color='red')
    x = np.arange(len(buildName))
    tickpoints = np.arange(len(buildName))
    plt.xticks(tickpoints, buildName)
    for a,b in zip(x,buildPrice):
        plt.text(a,b,b,ha='center',va='bottom',fontsize='10')
    # x_major_locator = MultipleLocator(1)
    # ax = plt.gca()
    # ax.xaxis.set_major_locator(x_major_locator)

    plt.show()

if __name__ == '__main__':
    main()

 四.总结

不说多少,该换电脑了,才爬了五六个网站,电脑就卡爆了,更不谈建立训练集了

源码:https://github.com/520zyzy/Machine-Learning/tree/master

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值