第一次看到 http://bbs.byr.cn/#!article/Python/1859?p=1&au=lc10210103 看他说的很容易的样子就果断试一试了确实很简单,代码如下:
# -*- coding: utf-8 -*-
import urllib2
import urllib
import sys
class get_mm_pic(object):
#成员变量?
page_num = 0
mmurl = ""
#定义成员变量初始化函数
def __init__(self, page_num):
self.page_num = page_num
self.mmurl = "http://mm.taobao.com/json/request_top_list.htm?type=0&page="
#定义主要的图片提取下载函数
def get_pic(self):
page_num = self.page_num
mmurl = self.mmurl
page_count = 0
mm_count = 0
pic_count = 0
while page_count < page_num:
page_count += 1
mm_count = 0
pic_count = 0
url = mmurl + str(page_count)
up = urllib2.urlopen(url)
# cont get the url's content
cont = up.read()
#print cont
pa = j = 0
pmmh = pmmt = 0
while True:
ahref = '''<a href="http'''
target = '''" target="'''
pmmh = cont.find(ahref)
#打开每一个MM的个人主页
pmmt = cont.find(target, pmmh)
if pmmh == -1:
break
modelurl = cont[pmmh + len(ahref) - 4 : pmmt]
mup = urllib2.urlopen(modelurl)
mm_count += 1
pic_count = 0
mcont = mup.read()
img_style = "<img style="
jpg = ".jpg"
src = '''src="'''
while True:
pic_hptr = mcont.find(img_style)
pic_tptr = mcont.find(jpg, pic_hptr)
if pic_hptr == -1:
break
mm_pic = mcont[pic_hptr : pic_tptr + len(jpg)]
#定位到 src= 的位置 最终得到 mm图片的url mm_pic_url
pic_hptr = mm_pic.find(src)
mm_pic_url = mm_pic[pic_hptr+len(src) : ]
pic_count += 1
print mm_pic_url
#下载图片
try:
print ">>>downloading : page_" + str(page_count) + "_lady_" + str(mm_count) + "_pic_" + str(pic_count) + ".jpg......."
urllib.urlretrieve(mm_pic_url, "page_"+str(page_count)+"_lady_"+str(mm_count)+"_pic_"+str(pic_count)+".jpg")
except KeyboardInterrupt:
print "SIGINT, exit..."
sys.exit(0)
except:
pass
#下一个MM个人主页
mcont = mcont[pic_tptr : ]
#下一个MM列表页
cont = cont[pmmt+1 : ]
def main(page_num):
get_mm_pictures = get_mm_pic(page_num)
get_mm_pictures.get_pic()
if __name__ == '__main__':
main(int(sys.argv[1]))
和拖延症战斗了半周终于完成了这个,和男程序员交流真是件痛苦的事情呀 >< 是吧罗瑞阳学长