经历了只能整个页面截图,到可以抓取图片链接进行截图之后,发现“使用Python截图抓取厦门房地产网签数据之二”里的代码太繁杂了,于是尝试精简,以下为精简后的代码
import requests
from bs4 import BeautifulSoup
from datetime import date
from time import sleep
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66'
}
def get_html():
html = requests.get('http://fdc.zfj.xm.gov.cn/Home/Index',headers = headers).text
soup = BeautifulSoup(html,'html.parser')
return soup
# print(get_html())
leixing = ['onehand onetwo','netsign onetwo','secondhand onetwo']
def address():
pic_address = []
soup = get_html().find('div', {'class': 'container'}).find('div', {'style': 'width:1000px;margin:0 auto;'}).\
find('div', {'class': 'main'}).find('div', {'class': 'main_turnover clearfix'})
for t in leixing:
image = soup.\
find('div',{'class':t}).find('div',{'class':'imgcontainer'}).find('img').get('src')
image_address = r'http://fdc.zfj.xm.gov.cn' + image
pic_address.append(image_address)
sleep(5)
return pic_address
def get_picture():
for a in address():
f = open("C:\\data\\网签备份\\" + str(date.today()) + a[30:35] + '.png','w+b')
f.write(requests.get(a,headers = headers).content)
try:
get_picture()
except:
print(False)