python下载图片、已知url_python下载文件(图片)源码,包含爬网内容(爬url),可保存cookie...

#coding=utf-8

'''

Created on 2013-7-17

@author: zinan.zhang

'''

import re

import time

import httplib2

import urllib

from bs4 import BeautifulSoup

#路径

savePath = 'F://TDDOWNLOAD//aNO.4//'

#获取url

def url_xunhuan(url,list):

return url + list

#下载图片的时候

time.sleep(0.5)#先sleep,再读取数据

"""根据url下载文件,文件名自动从url获取"""

def gDownload(url,savePath):

#参数检查,现忽略

fileName = gGetFileName(url)

#fileName =gRandFilename('jpg')

gDownloadWithFilename(url,savePath,fileName)

"""根据url获取文件名"""

def gGetFileName(url):

if url==None: return None

if url=="" : return ""

arr=url.split("/")

return arr[len(arr)-1]

"""根据url下载文件,文件名参数指定"""

def gDownloadWithFilename(url,savePath,file):

#参数检查,现忽略

try:

urlopen=urllib.URLopener()

fp = urlopen.open(url)

data = fp.read()

fp.close()

file=open(savePath + file,'w+b')

file.write(data)

print "下载成功:"+ url

file.close()

except IOError:

print "下载失败:"+ url

#初始化页面,提取必要信息

def getPage(url):

userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'

headers = {

'User-Agent': userAgent,

'Accept-Language':  'zh-CN,zh;q=0.8',

'Accept': 'text/css,*/*;q=0.1',

}

http = httplib2.Http(".cache")

_, content = http.request(url, 'GET',headers=headers)

return content

#循环下载列表固定的  ---就是wallpaper,enterdesk等网站

def xuanhuan_down_list():

list=[]

url = 'https://tupian.enterdesk.com/2013/mxy/0311/4/'

temp=10

for i in range(temp):

list.append(str(i)+'.jpg')

for i in range(temp):

url_list = url_xunhuan(url,list[i])

gDownload(url_list,savePath)

time.sleep(0.2)

#爬网获取url

def spider_url(url):

page = getPage(url)

dom = BeautifulSoup(page)

srcs = [x['src'] for x in dom.findAll('img')]

#成功获取url

return srcs[0]

#循环下载列表随机的 ---就是ZOL桌面壁纸下载

def xuanhuan_down_suiji():

try:

temp=25

i=88

j=i

while (i <= j+temp):

#https://desk.zol.com.cn/showpic/1920x1200_30688_33.html

url = 'https://desk.zol.com.cn/showpic/1920x1200_12'+str(i)+'_37.html'

src_url = spider_url(url)

gDownload(src_url,savePath)

time.sleep(0.1)

i+=1

except IOError:

print "url获取失败!"

if __name__ == "__main__":

#gDownload(url,savePath)

'''

#批量下载序号固定的图片

xuanhuan_down_list()

'''

'''

#批量下载隐藏jpg路径的文件

xuanhuan_down_suiji()

'''

#批量下载文件

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值