对煎·····蛋网的图片进行批量下载,但此脚本下载会漏图(与该网站的限制有关).输出信息时可能存在文字编码问题
#!usr/bin/env python
# -*- coding:utf-8 -*-
# URl:http://jandan.net/ooxx/page-+str(1-3000)+#comments
import multiprocessing
import os
import random
import urllib
import urllib2
from multiprocessing import Pool, Queue, cpu_count
import BeautifulSoup
import re
import requests
# r=requests.get("//ww3.sinaimg.cn/bmiddle/7c8e8afbjw1dh9yimwp4xj.jpg")
# urllib.urlopen("//ww3.sinaimg.cn/bmiddle/7c8e8afbjw1dh9yimwp4xj.jpg")
# 保存一张图片
import time
import socket
timeout = 10
socket.setdefaulttimeout(timeout)
#获得URL并保存图片
'''
def saveImg(imageURL, fileName):
try:
u = urllib2.urlopen(imageURL, timeout=10)
data = u.read()
f = open(fileName, 'wb')
f.write(data)
f.close()
except:
print u"图片地址有问题"
'''
def saveImg(imageURL,pageIndex,fileName):
try:
# 选择随机的User-Agent,以做辨别 Referer的作用
user_agent = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',