必应壁纸下载

最新推荐文章于 2023-02-28 12:12:24 发布

qidu1998

最新推荐文章于 2023-02-28 12:12:24 发布

阅读量678

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/qidu1998/article/details/78912858

版权

python 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

可恶的csdn，那天误删了之前写的必应壁纸下载的代码，结果在回收站中不能恢复。
而且那天测试发现必应壁纸官网做了一些小改动，代码太脆弱，直接崩掉了。
前几天修改好了，现在重新放在这里。

#!/usr/bin/python
#-*-coding:utf-8-*-
import urllib
import urllib.request
import re
import os
import time
from multiprocessing import Pool

img_down=[]
name_down=[]
file_save=""

def judge_file():
    while True: 
        try:
            global file_save
            file_save=input("请输入保存文件路径，（例：D:/wallpaper）:")
            if os.path.exists(file_save):
                break
            else:
                os.makedirs('%s'%file_save)
                break
        except:
            print("文件路径不存在或格式错误，请重新输入！")
    while True:
        try:
            download_image(file_save)
        except:
            pass

def img_url_download():
    url="https://bing.ioliu.cn/"
    url_head='https://bing.ioliu.cn'
    pattern='a class="ctrl download" href="/photo/[A-z,0-9,-]{0,}[0-9]{0,}\?force=download'
    #pattern_num='i class="icon icon-prev">上一页</i></a><span>1 / [0-9]{0,}'
    #原网页对显示的总页数已做改动
    pattern_num='<a href="/">上一页</a><span>1 / ([0-9]{0,})</span>'
    pattern_name='<h3>(.*?)</h3>'        #'<h3>[^\x00-\xff]{0,}'

    req = urllib.request.Request(url)
    #print(req)
    page = urllib.request.urlopen(req)
    #time.sleep(1)
    html=page.read()

    #print(html.decode('utf-8'))
    match_num=re.findall(pattern_num,html.decode('utf-8'))

    print(match_num[0])
    #sum_num=int(match_num[0].split(" ")[-1])
    #此处所用正则做了改动
    sum_num=int(match_num[0])
    img_down=[]
    name_down=[]

    for i in range(1,sum_num):
        url="https://bing.ioliu.cn/?p="+str(i)
        req = urllib.request.Request(url)
        page = urllib.request.urlopen(req)
        time.sleep(1)
        html=page.read()
        match_list=re.findall(pattern,html.decode('utf-8'))
        match_name=re.findall(pattern_name,html.decode('utf-8'))

        for j in match_list:
            img_down.append(url_head+j[30:])
        for k in match_name:
            name_down.append(k)
        print("save address data %d"%(100*i/(sum_num-1)),"%",sep="")

        if(len(img_down)!=len(name_down)):
            flag=input("程序在https://bing.ioliu.cn/?p=%d上匹配时出现了错误，是否继续？y/n\n"%i)
            while True:
                if flag=='y':
                    break
                elif flag=='n':
                    exit()
                else:
                    print("输入错误，请重新输入！")
    return img_down,name_down

def download(inf):
    file_save,url,name=inf[0],inf[1],inf[2]
    #urllib.request.urlretrieve(i,'D:\wallpaper\%s.jpg' %i[37:].split("?")[0])
    if name.find('【')!=-1:
        if name.find('（')!=-1:
            urllib.request.urlretrieve(url,'%s/%s.jpg'%(file_save,name.split('（')[0][0:-1].split('】')[1]))
        else:
            urllib.request.urlretrieve(url,'%s/%s.jpg'%(file_save,name.split('(')[0][0:-1].split('】')[1]))
    elif name.find('（')!=-1:
        urllib.request.urlretrieve(url,'%s/%s.jpg'%(file_save,name.split('（')[0][0:-1]))
    else:
        urllib.request.urlretrieve(url,'%s/%s.jpg'%(file_save,name.split('(')[0][0:-1]))
        #print("download picture %0.2f"%(100*(i+1)/(length)),"%",sep="")
    #print(name.split('(')[0][0:-1])

def download_image(file_save):
    img_down,name_down=img_url_download()
    length=len(img_down)
    inf=[]
    for i in range(0,length):
        inf.append((file_save,img_down[i],name_down[i]))

    pool = Pool()
    pool.map(download, inf)
    pool.close()
    pool.join()

if __name__=='__main__':
    start = time.clock()
    judge_file()
    time_cost = (time.clock() - start)
    print("Time used:",time_cost)