imgur.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
 
from __future__ import with_statement
import sys
import os
import urllib2
from urlparse import urlparse
import random
import re
import gevent
from gevent import monkey
monkey.patch_all()
 
def get(url):
  setup = urllib2.build_opener()
  # TODO: Write appropriate headers.
  setup.add_headers = [('None', 'None')]
  urllib2.install_opener(setup)
  try:
    request = urllib2.Request(url)
  except (urllib2.HTTPError, urllib2.URLError), e:
    sys.exit(-1)
  return setup.open(request)
 
def is_url(url):
  res = urlparse(url)
  return 'imgur.com' in res.netloc
 
def fetch(url):
  res = urlparse(url)
  key = res.path.split('/')[2]
  urll = 'https://imgur.com/a/%s/noscript' % key
  return get(urll).read(), key
 
def get_or_create_folder(key, folder=None):
  foldername = key
  if folder is not None:
    foldername = folder
  if not os.path.exists(foldername):
    os.makedirs(foldername)
  return foldername
  
def fetch_images(foldername, images):
  gevent.sleep(random.randint(0, 1) * 0.0001)
  path = os.path.join(foldername, images[1])
  with open(path, 'wb') as img:
    img.write(get(images[0]).read())
  print 'Done:\t%s' % images[0]
 
def save(url, folder=None):
  data, key = fetch(url)
  REGEX = re.compile(r'<img src="(http\:\/\/i\.imgur\.com\/([a-zA-Z0-9]{5}\.(jpg|png|gif)))"')
  images = REGEX.findall(data)
  foldername = get_or_create_folder(key, folder)
  return foldername, images
 
 
if __name__ == '__main__':
  url = sys.argv[1]
  try:
    folder = sys.argv[2]
  except IndexError:
    folder = None
  foldername, images = save(url, folder=folder)
  threads = [gevent.spawn(fetch_images, foldername, image) for image in images]
  gevent.joinall(threads)

 

转载于:https://www.cnblogs.com/bergus/p/4592772.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值