喜欢的明星图片下载

最新推荐文章于 2021-07-31 22:02:59 发布

雾霾的梦想

最新推荐文章于 2021-07-31 22:02:59 发布

阅读量471

点赞数

分类专栏： request 文章标签：明星美女图片下载

本文链接：https://blog.csdn.net/qq_34568522/article/details/79092046

版权

request 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1.目前仅仅实现对百度图片下载（喜欢的大明星）

环境：

python2.7

2.代码分析

a.创建关键字以空格分开

b.创建文件夹保存下载的图片

c.发送request请求去url地址

d.下载数据

e.删除某些不能打开的图片

3.代码如下：

#-*- coding:utf-8 -*-
import re
import requests
from PIL import Image
import os
#存放所有已经下载的url连接
all_url = []
#下载图片
def dowmloadPic(html,keyword,filepath):
    pic_url = re.findall('"objURL":"(.*?)",', html, re.S)
    i = 0
    print '找到关键词:'+keyword+'的图片，现在开始下载图片...'
    for each in pic_url:
        if (i == 50):
            break
        if getrepeat(each,all_url) == 1:
          continue
        else:
          all_url.append(each)
        print '正在下载第'+str(i+1)+'张图片，图片地址:'+str(each)
        try:
            pic= requests.get(each, timeout=30)
        except Exception:
            print '【错误】当前图片无法下载'
            continue
        string = filepath.decode('cp936').encode('utf-8')+'//'+keyword+'_%s.jpg' % str(i)
        print string
        #resolve the problem of encode, make sure that chinese name could be store ===string.decode('utf-8').encode('cp936')
        fp = open(string.decode('utf-8').encode('cp936'),'wb')
        fp.write(pic.content)
        fp.close()
        i += 1
#url图片地址，urls已经下载的所有图片数组对象
#是否重复的图片
#返回0，是
#返回1，不是
def getrepeat(url,urls):
    for one in urls:
        if one == url:
            print("重复图片")
            return 1
        else:
            return 0
#创建目录
def mkdir(path):
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符号
    path = path.rstrip("\\")

    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)

    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path)

        print path + ' 创建成功'
        return True
    else:
        # 如果目录存在则不创建，并提示目录已存在
        print path + ' 目录已存在'
        return False
#删除不能打开的图片
def delcantopen(dir):
    for root, dirs, files in os.walk(dir):
        for file in files:
            try:
                im = Image.open(dir + file)
            except:
                os.remove(dir + file)
                print '此图片不能打开，已删除'
# 创建文件夹
def createfile(path):
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符号
    path = path.rstrip("\\")
    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)
    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path)
        print path + ' 创建成功'
        return True
    else:
        # 如果目录存在则不创建，并提示目录已存在
        print path + ' 目录已存在'
        return False
if __name__ == '__main__':
    # word = raw_input("Input key word: ")
    string = "唐嫣 朱茵”

words = string.split(' ')
print len(words)
filepath = ''
for word in words:
    filepath = 'E://star_pic//'+word.decode('utf-8').encode('cp936')
    # print 'file==',filepath
    createfile(filepath)
    url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word='+word+'的生活'+'&ct=201326592&v=flip'
    # url = 'https://user.qzone.qq.com/2592235804/infocenter'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=李克强&ct=201326592&v=flip
    result = requests.get(url)
    dowmloadPic(result.text,word,filepath)
    # number+=1
delcantopen(filepath.decode('cp936').encode('utf-8'))