python 多线程下载图片_python多线程下载图片

最新推荐文章于 2024-08-26 10:29:52 发布

weixin_39736547

最新推荐文章于 2024-08-26 10:29:52 发布

阅读量120

点赞数

文章标签： python 多线程下载图片

python多线程下载图片

功能：从p_w_picpath.baidu.com自动翻页下载图片的python程序

用法：运行程序后，输入关键字即可

#!/usr/bin/python

# filename: getbaidupic.py

# description: get p_w_picpaths from p_w_picpath.baidu.com

# author: cjcse

# version: v 0.21

import urllib

import htmllib

import formatter

import string

import os

import sys

import time

import thread

#import threading

class Parser(htmllib.HTMLParser):

#return a dictionary mapping anchor texts to lists of associated hyperlinks

def __init__(self, verbose=0):

self.anchors = {}

f = formatter.NullFormatter()

htmllib.HTMLParser.__init__(self, f, verbose)

def anchor_bgn(self, href, name, type):

self.save_bgn()

self.anchor = href

def anchor_end(self):

text = string.strip(self.save_end())

if self.anchor and text:

self.anchors[text] = self.anchors.get(text, []) + [self.anchor]

def GetJpg(url):

try:

global save

global total

global successed

global failed

total += 1

seps = url.split("/")

size = len(seps)

name = seps[size-1]

name = save + "\\" + name

i = 1

list = name.split(".")

while os.path.exists(name):

if len(list) == 2:

name = list[0] + "_" + repr(i) + "." + list[1]

else:

name = list[0] + "_" + repr(i)

i += 1

dat = urllib.urlopen(url).read()

if len(dat) < 11024:

print url + "\t[Failed]"

return

op = open(name, "wb")

if not op:

print url + "\t[Failed]"

exit()

op.write(dat)

op.close()

print url + "\t[OK]"

except:

print url + "\t[Failed]"

def GetBaiduNextPage(url):

global pn

url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)

pn += 18

return url

def GetAllJpg(url):

html = urllib.urlopen(url).read()

p = Parser()

p.feed(html)

p.close()

cnt = 0

for k, v in p.anchors.items():

for uri in v:

if uri.find(".jpg") != -1:

ls = uri.split("&")

for st in ls:

url2 = st.split("=")

for st2 in url2:

st2 = string.lower(st2)

if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:

try:

GetJpg(st2)

except:

continue

print "---------------------------------------------------------------------"

print "Description: Get p_w_picpaths from p_w_picpath.baidu.com. "

print "Author: cjcse from CU."

print "version: v 0.2."

print "---------------------------------------------------------------------"

str = raw_input("Input your keywords: ")

while (len(str) == 0):

str = raw_input("Keyword: ")

url = "http://p_w_picpath.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baidup_w_picpath&pv=&word=" + str + "&z=5"

try:

if not os.path.exists("c:\\p_w_picpath_baidu"):

os.mkdir("c:\\p_w_picpath_baidu")

except:

print "Failed to create directory in disk c:"

exit()

pages = 50

save = "c:\\p_w_picpath_baidu"

print "The p_w_picpaths will be stored in folder \"c:\\p_w_picpath_baidu\"."

rn = 21

pn = 18

ln = 2000

for i in range(0, pages):

thread.start_new_thread(GetAllJpg,(url,))

url = GetBaiduNextPage(url)

while True:

pass

netmouse

207篇文章，21W+人气，0粉丝

weixin_39736547

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫