下载保存图片

import pandas as pd
import re,requests,os

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}


def get_pic(url,count=0,maxcount=10,code = 'utf-8'):
	try:
		r = requests.get(url,headers=headers,timeout=30)
		r.raise_for_status()
		r.encoding = code
		return r.content
	except:
		count += 1
		print('第{}次访问{}失败'.format(count,url))
		if count < maxcount:
			return get_pic(url,count=count)

house= pd.read_csv('bochao_choice_info.csv',usecols=['房屋地址','房间名称','海拔价','是否已租','配套设施','图片地址'])
# house=house[house["是否已租"]==0]
# # print(house)
# print(house['配套设施'])
#
# #none数字类型,无法in遍历,先转换成str类型
# house["is_duwei"] = house['配套设施'].apply(lambda x : '独卫' in f'{x}')
# house = house[house["is_duwei"] == True]
# print(house)


districts=["开福区","芙蓉区","雨花区","天心区","长沙县","岳麓区","望城区"]
for district in districts:
    root = f'./博超公寓照片/{district}/'
    if not os.path.exists(root):
        os.mkdir(root)

    house["is_need"] = house.apply(lambda x : district in x['房屋地址'],axis = 1)
    infos = house[house["is_need"] == True]
    print(infos)
    quyu=infos['房屋地址']
    address=infos['房间名称']
    price = infos['海拔价']
    fac = infos['配套设施']
    pic=infos['图片地址']
    order=range(1,len(quyu))

    # top = """
    #     <!DOCTYPE html>
    #         <html lang="en">
    #         <head>
    #             <meta charset="UTF-8">
    #             <title>个人房源更新</title>
    #         </head>
    #         <body>
    #         """
    # bottom = '''</body>
    #             </html>'''
    # path = f'公众号信息发布内容{district}.html'
    # with open(path, 'w',encoding='utf-8') as f:
    #     f.write(top)

    for quyu,address,price,pics,order,fac in zip(quyu,address,price,pic,order,fac):

        pics = re.findall('(http.*?g)\?',str(pics))
        # print(pics)
        # info = """
        #     <p>{}.#{}-房东直租</p>
        #     <p>地址:{}</p>
        #     <p>小区:{}</p>
        #     <p>价格:{} 押一付一,可短租,无中介费</p>
        #     <p>配套设施:{}</p>
        #     <p>照片:看房联系平台小编微信:17752882209</p>
        #     <p>
        #     """.format(order,district,quyu,address,price,fac)
        # info2 = ''
        num=0
        for pic in pics:
            num += 1
            r = get_pic(pic)
            room_path = f'{address}{price}图片{num}.jpg'.replace('/',"")
            with open(root+room_path,'wb') as f:
                f.write(r)
            print('{}已下载完成'.format(room_path))


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值