import requests
import urllib.request
from bs4 import BeautifulSoup
import os
import time
def get_car_brand_url(base_url):
car_brand = 'https://car.autohome.com.cn'
car_brand_list = []
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
url_all = soup.find_all('a')
for item in url_all:
car_brand_list.append(car_brand + item.get('href'))
return car_brand_list
def get_car_brand_class_url(car_url_list):
car_class_base = 'https://car.autohome.com.cn'
car_class_list = []
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_url_list:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
url_list = soup.find('div', {'class': 'uibox-con carpic-list02'}).find_all('a')
for a in url_list:
car_class_list.append(car_class_base + a.get('href'))
return car_class_list
def get_brand_class_image_url(car_class_list):
car_image_url = []
car_base = 'https://car.autohome.com.cn'
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_class_list:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
car_image_url.append(car_base + soup.find('ul', {'class': 'search-pic-sortul'}).find('a').get('href'))
return car_image_url
def download_image(car_image_url, folder_path):
if not os.path.exists(folder_path):
os.makedirs(folder_path)
car_base = 'https://car.autohome.com.cn'
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_image_url:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
class_all_image = soup.find('div', {'class': 'uibox-con carpic-list03 border-b-solid'}).find_all('img')
index = 0
for src in class_all_image:
image_url = 'http:' + src.get('src')
img_name = os.path.join(folder_path, '{}.jpg'.format(index))
image = requests.get(image_url)
with open(img_name, 'wb') as file:
file.write(image.content)
file.flush()
print('第%d张图片下载完成' % index)
index += 1
base_url = 'http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=2%20&brandId=0%20&fctId=0%20&seriesId=0'
image_url_list = get_car_brand_url(base_url)
car_class_list = get_car_brand_class_url(image_url_list)
car_image_url = get_brand_class_image_url(car_class_list)
folder_path = r'./car_images'
download_image(car_image_url, folder_path)
