使用Python截图抓取厦门房地产网签数据之三

本文链接：https://blog.csdn.net/cwjcw81/article/details/113914429

经历了只能整个页面截图，到可以抓取图片链接进行截图之后，发现“使用Python截图抓取厦门房地产网签数据之二”里的代码太繁杂了，于是尝试精简，以下为精简后的代码

import requests
from bs4 import BeautifulSoup
from datetime import date
from time import sleep

headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66'
    }
def get_html():
    html = requests.get('http://fdc.zfj.xm.gov.cn/Home/Index',headers = headers).text
    soup = BeautifulSoup(html,'html.parser')
    return soup
# print(get_html())

leixing = ['onehand onetwo','netsign onetwo','secondhand onetwo']
def address():
    pic_address = []
    soup = get_html().find('div', {'class': 'container'}).find('div', {'style': 'width:1000px;margin:0 auto;'}).\
        find('div', {'class': 'main'}).find('div', {'class': 'main_turnover clearfix'})
    for t in leixing:
        image = soup.\
                find('div',{'class':t}).find('div',{'class':'imgcontainer'}).find('img').get('src')

        image_address = r'http://fdc.zfj.xm.gov.cn' + image
        pic_address.append(image_address)
        sleep(5)
    return pic_address

def get_picture():
    for a in address():
        f = open("C:\\data\\网签备份\\" + str(date.today()) + a[30:35] + '.png','w+b')
        f.write(requests.get(a,headers = headers).content)


try:
    get_picture()
except:
    print(False)