这是之前写的一个简单爬取妹纸图的爬虫,下面是源代码:
1 # -*- coding: utf-8 -*- 2 3 import requests,time,urllib.request,os 4 from multiprocessing import Process 5 from lxml import etree 6 7 #os.chdir("meizhu")切换工作目录 8 print (os.getcwd())#查看当前工作目录 9 10 headers = {"User-Agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"} 11 12 13 def use_proxy(): 14 proxy = urllib.request.ProxyHandler({'http':'proxy_addr'}) 15 opener = urllib.request.build_opener(proxy,urllib.request.HTTPHandler)\ 16 17 def respon(imgurl): 18 req = urllib.request.Request(imgurl,headers=headers) 19 html = urllib.request.urlopen(req) 20 response=html.read().decode('utf-8') 21 #print(response) 22 selector = etree.HTML(str(response)) 23 imgs =selector.xpath('//div[@class="pic"]/ul/li/a/img/@src') 24 25 for imgname in imgs: 26 imgnames = str(imgname.split('/')[5].split('.')[0] + ".jpg") 27 #print(imgnames) 28 29 file = urllib.request.urlretrieve(str(imgname), filename=imgnames) 30 print("爬取妹子图完成!!!!哈哈哈") 31 32 if __name__=="__main__": 33 for i in range(1,100): 34 imgurl = 'http://www.mmjpg.com/home/'+str(i) 35 respon(imgurl)