pyhton 爬图片网站并保存oss
话不多说直接上代码
from lxml import etree
import datetime
import io
import random
import string
import uuid
import requests
import oss2
def parser(img, imageName, dirpath):
endpoint='<>' # oss url
access_key_id=''
access_key_secret=''
bucket_name=''
bucket=oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
result=bucket.put_object(f'{dirpath}/{imageName}', img.getvalue())
return result.status
def save_oss_img(url):
dirpath='img'
now=datetime.datetime.now()
nonce=str(uuid.uuid4())
imageName='{}.jpg'.format(nonce)
img=io.BytesIO(requests.get(url).content)
statusCode=parser(img, imageName, dirpath)
def crawling():
# 爬虫这里不是很详细可以根据具体的业务来爬取
url = '<爬取的url>'
text = requests.get(url).text
html = etree.HTML(text)
html_data = html.xpath("//div[@class='slist']/ul/li/a/img/@src")
url_list = []
for i in html_data:
save_oss_img(i)
def run():
crawling()