#!/usr/bin/python
# -*- coding:utf-8 -*-
import httplib2
import urllib.request
import json
#import urllib2
import re
import os
import string
class BaiduImage(object):
def __init__(self):
super(BaiduImage, self).__init__()
u'图片获取中,CTRL+C 退出程序...'
self.page = 60 # 当前页数
if not os.path.exists(r'./image'):
os.mkdir(r'./image')
def request(self):
try:
urlError=""
while 1:
conn = httplib2.Http()
request_url = 'http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word=风景&cg=girl&rn=60&pn=' + str(
self.page)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0',
'Content-type': 'test/html'}
# body = urllib.urlencode({'tn':'resultjsonavatarnew','ie':'utf-8','word':'%E7%BE%8E%E5%A5%B3','cg':'girl','pn':self.page,'rn':'60'})
# conn.request( request_url,'GET', body=None, headers=headers)
resp,content = conn.request(request_url,'GET', body=None, headers=headers)
# print r.status
if resp.status == 200:
data =content
#data =unicode(data, errors='ignore')
d= data.decode("UTF-8")
decode = json.loads(d)
urlError=decode
self.download(decode['imgs'])
self.page += 60
print(self.page)
except Exception as e:
print (e+urlError)
finally:
print(conn)
def download(self, data):
imgCount=0
for d in data:
# url = d['thumbURL'] 缩略图 尺寸200
# url = d['hoverURL'] 尺寸360
try:
url = d['objURL']
print(url)
data = urllib.request.urlopen(url).read()
pattern = re.compile(r'.*/(.*?)\.jpg', re.S)
item = re.findall(pattern, url)
if item is None:
continue
if ".jpg" in url:
FileName = str('image/') + item[0] + str('.jpg')
if f.__exit__(FileName):
with open(FileName, 'wb') as f:
f.write(data)
else:
print(FileName+"存在!")
except Exception as e:
print(e)
finally:
imgCount+=1
print(imgCount.__str__() +":"+url)
if __name__ == '__main__':
bi = BaiduImage()
bi.request()