mapdownload

该博客介绍了一个Python脚本,用于批量下载网络上的图像,并使用代理IP和多种User-Agent来避免被阻止。脚本首先从指定URL下载图像,然后通过读取已下载图像的坐标信息,找出缺失的图像并下载。最后,它将所有图像拼接成一张大图。这个过程对于爬虫项目或地图拼接等应用非常有用。
摘要由CSDN通过智能技术生成

下载
环境python 2.7


# coding=utf-8
import urllib2 as ulb
import numpy as np
import PIL.ImageFile as ImageFile
import cv2
import random
import time
import glob
import re
import ssl

ssl._create_default_https_context = ssl._create_unverified_context
 
# http://www.goubanjia.com/
proxy_list = ["124.205.153.35",
"124.205.153.39",
"60.195.62.69",
"60.194.240.6",
"106.15.194.22",
"118.144.119.81",
"124.192.87.31",
"106.14.187.182",
"124.205.153.98",
"106.14.10.139",
"221.221.220.68",
"124.205.153.56",
"124.205.153.101",
"118.144.119.92",
"121.4.82.250",
"221.122.91.65"]
 
# 收集到的常用Header
my_headers = [
	"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
	"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
	'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
	'Opera/9.25 (Windows NT 5.1; U; en)',
	'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
	'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
	'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
	'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
	"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
	"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
]
 
 
# 获取影像数据
def getImage(url):
	# 设置暂停时间为0.1秒
	t = 0.05
	time.sleep(t)
	# 随机从列表中选择IP、Header
	proxy = random.choice(proxy_list)
	header = random.choice(my_headers)
 
	print proxy, header
 
	# 基于选择的IP构建连接
	
	urlhandle = ulb.ProxyHandler({'http': proxy})
	
	opener = ulb.build_opener(urlhandle)
	
	ulb.install_opener(opener)
 
	# 用urllib2库链接网络图像
	response = ulb.Request(url)
 
	# 增加Header伪装成浏览器
	response.add_header('User-Agent', header)
 
	# 打开网络图像文件句柄
	fp = ulb.urlopen(response)
 
	# 定义图像IO
	p = ImageFile.Parser()
 
	# 开始图像读取
	while 1:
		s = fp.read(1024)
		if not s:
			break
		p.feed(s)
 
	# 得到图像
	im = p.close()
 
	# 将图像转换成numpy矩阵
	arr = np.array(im)
 
	# 将图像RGB通道变成BGR通道,用于OpenCV显示
	pic = np.zeros(arr.shape, np.uint8)
	pic[:, :, 0] = arr[:, :, 2]
	pic[:, :, 1] = arr[:, :, 1]
	pic[:, :, 2] = arr[:, :, 0]
 
	return arr



'''

files = glob.glob('./tiles/*.jpg')

existing_tiles=[]

#print files
for item in files:
	match = re.split(r'\./tiles\\', item)
	match = re.split(r'\.', match[1])
	match = re.split(r',', match[0])
	col = match[1]
	row = match[0]
	existing_tiles.append(str(row)+'@'+str(col))
	

with open('existing_tiles.txt','w') as f:
	f.write(';'.join(existing_tiles))
	
	
'''

with open('existing_tiles.txt','r') as f:
	read_files = f.read()
existing_tiles = read_files.split(";")

print existing_tiles[0:5]
	
left_up = [43036,218480]
right_down = [43257,218873]
n = right_down[0]-left_up[0]
m = right_down[1]-left_up[1]

tile_matrix = [[0] * m for i in range(n) ]


for e in existing_tiles:
	et = e.split("@")
	try:
		tile_matrix[int(et[0])-left_up[0]][int(et[1])-left_up[1]] = 1
	except IndexError as e:
		print "Indexerror",et
#print tile_matrix[0:5]

missing_tiles = []
for x in range(0,n):
	for y in range(0,m):
		if tile_matrix[x][y] == 0:
			missing_tiles.append([x+left_up[0],y+left_up[1]])

			
			
maxlen = len(missing_tiles)
print maxlen

print missing_tiles[0:8]


trytimes = 0

		
for i in range(278,maxlen):
	col = missing_tiles[i][1]
	row = missing_tiles[i][0]
	
	print i,"max:",maxlen
	
	if trytimes<10:
		try:
			img = getImage('url')
		except ulb.URLError as e:
		
			if e.code == 404:
				#with open('nulls_tiles.txt','a') as f:
					#f.write(str(row)+'@'+str(col)+';')
				continue
		
			print e
			continue
			i = i-1
			trytimes = trytimes+1
			pass
		
	#print col,row
	#cv2.imshow('image', img)
	path = './tiles/'+str(row)+','+str(col)+'.jpg'
	cv2.imwrite(path,img)
	#cv2.waitKey(0)


合并

#!/usr/bin/env python
import glob
import re
from PIL import Image
import PIL

files = glob.glob('./*.jpg')
files.sort(key=lambda x: tuple(int(i) for i in re.findall('\d+', x)[:2]))

#print(files)
imagefiles = {}

cols = []
rows = []
'''
for item in files:
	match = re.split(r'\.\\', item)
	match = re.split(r'\.', match[1])
	match = re.split(r',', match[0])
	cols.append(match[1])
	rows.append(match[0])

print cols
cols.sort()
print cols

'''

tile_matrix = []


for row in range(43036,43257):#43036,43257
	temp = []
	for col in range(218690,218873):#218480,218873,690,700
		temp.append([row,col])
	tile_matrix.append(temp)

	


total_width  = len(tile_matrix[0]) * 256
total_height = len(tile_matrix) * 256

print total_height,":",total_width

new_image = Image.new('RGB', (total_width,total_height))


for x in range(len(tile_matrix[0])):
	for y in range(len(tile_matrix)):
		try:
			image = Image.open('./tiles/'+str(tile_matrix[x][y][0])+','+str(tile_matrix[x][y][1])+'.jpg')
		except Exception as e:
			image = Image.new('RGB', (256, 256), (0, 0, 0))
		new_image.paste(image,(y*256, x*256))

		
PIL.ImageFile.MAXBLOCK = (new_image.size[0]+256*3)*(new_image.size[1]+256*3)
new_image.save('6Tmerge.jfif', quality = 90)


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值