1 #-*- coding: utf-8 -*-
2 from __future__ importunicode_literals3 from HttpClient importHttpClient4 importsys,re,os5 from threading importThread6 from Queue importQueue7 from time importsleep8
9 q =Queue()#图片集url队列10 imgCount =011 classgetRosiUrl(HttpClient):#一级url爬取类12 def __init__(self):13 self.__pageIndex = 1
14 self.__Url = "http://www.5442.com/tag/rosi/"
15 self.__refer = 'http://www.5442.com/tag/rosi.html'
16 #将爬取的图片集url放入队列
17 def __getAllPicUrl(self,pageIndex):18 realurl = self.__Url + str(pageIndex) + ".html"
19 printrealurl20 pageCode = self.Get(realurl,self.__refer)21 type =sys.getfilesystemencoding()22 #print pageCode[0:1666].decode("gb2312",'ignore').encode(type)
23 pattern = re.compile('
.*? ',re.S)24 items = re.findall(pattern,pa