python爬虫练习——爬取链家二手房信息(补充篇)

# -*- coding:utf-8 -*-
import xlwt,xlrd
from xlutils.copy import copy
from lxml import etree
import requests
import time

total_money = 0
total_number = 0
counties = ["beilin","weiyang","baqiao","xinchengqu","changan4","lianhu","yanta","gaoling1","jingkai1","xixian1","qujiang1","gaoxin5"]
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
}
date_time = time.strftime('%Y-%m-%d',time.localtime(time.time()))
data_average = [date_time]

#url = https://xa.lianjia.com/ershoufang/beilin/pg2/
for county in counties:
    url = "https://xa.lianjia.com/ershoufang/" + county + "/"
    response = requests.get(url, headers=headers)
    response.encoding = 'utf8'
    html = response.text
    root = etree.HTML(html)
    result = root.xpath("//div/@page-data")
    totalPage = eval(result[0])["totalPage"]
    for page in range(1,totalPage+1):
        url = "https://xa.lianjia.com/ershoufang/" + county +"/pg" + str(page) + "/"
        response = requests.get(url,headers = headers)
        response.encoding = 'utf8'
        html = response.text
        root = etree.HTML(html)
        # node_list = root.xpath("//div[@class='unitPrice']/span/text()")  #单价51891元/平米
        node_list = root.xpath("//div/@data-price")    #data-price = "51891"
        total_number += len(node_list)
        for node in node_list:
            total_money += float(node)
    average = total_money // total_number
    total_money = 0
    total_number = 0
    data_average.append(str(average))
print data_average

title = ["日期","碑林","未央","灞桥","新城区","长安",
        "莲湖","雁塔","高陵","经开","西咸","曲江","高新"]
# 创建一个工作表的对象
# workbook = xlwt.Workbook(encoding="utf-8")
# sheet = workbook.add_sheet('信息表')
# for i in range(len(title)):
#     sheet.write(0,i,title[i])
# for j in range(len(data_average)):
#     sheet.write(1,j,data_average[j])
# workbook.save(u"学生信息表.xls")

read_file = xlrd.open_workbook(u"学生信息表.xls", formatting_info=True)
# formatting_info=True:保留Excel的原格式
# 将文件复制到内存
write_data = copy(read_file)
# 读取复制后文件的sheet1
write_save = write_data.get_sheet(0)
# 写入数据
for j in range(len(data_average)):
    write_save.write(2,j,data_average[j])   #row需要写入配置文件下次自动读取
# write_save.write(4, 3, 1000)
# 保存写入数据后的文件到原文件路径
write_data.save(u"学生信息表.xls")

 

转载于:https://www.cnblogs.com/python-kp/p/11425884.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值