MODIS数据批量处理之HDF多栅格文件提取特征信息(未拼接)
对于MOD06_L2、MOD07_L2等Level 2 swath 产品,处理起来比较麻烦。。。。。。
记录一下。。。
目前没尝试:是先把栅格图像拼接后提取所需特征信息还是反着来,哪个效率更高。
python+GDAL批处理方法
参考博文:python+gdal实现MODIS_HDF文件转TIF(可批量) - 知乎 (zhihu.com)
代码:
from osgeo import gdal
import numpy as np
import os
from osgeo import osr
# # gdal打开hdf数据集
# datasets = gdal.Open(r"F:\MOD06_L2\HDF\MOD06_L2.A2015001.0220.061.2017318204746.hdf")
# # 获取hdf中的子数据集
# SubDatasets = datasets.GetSubDatasets()
# # 获取子数据集的个数
# SubDatasetsNum = len(datasets.GetSubDatasets())
# # 输出各子数据集的信息
# print("子数据集一共有{0}个: ".format(SubDatasetsNum))
# for i in range(SubDatasetsNum):
# print(datasets.GetSubDatasets()[i])
# # 获取hdf中的元数据
# Metadata = datasets.GetMetadata()
# # 获取元数据的个数
# MetadataNum = len(Metadata)
# # 输出各子数据集的信息
# print("元数据一共有{0}个: ".format(MetadataNum))
# for key,value in Metadata.items():
# print('{key}:{value}'.format(key = key, value = value))
# DatasetTS = datasets.GetSubDatasets()[20][0]
# DatasetPS = datasets.GetSubDatasets()[21][0]
# RasterTS = gdal.Open(DatasetTS)
# TS = RasterTS.ReadAsArray()
# print(DatasetTS)
# print(RasterTS)
# print(TS.shape)
# 数组保存为tif
def array2raster(TifName, GeoTransform, array):
cols = array.shape[1] # 矩阵列数
rows = array.shape[0] # 矩阵行数
driver = gdal.GetDriverByName('GTiff')
outRaster = driver.Create(TifName, cols, rows, 1, gdal.GDT_Float32)
# 括号中两个0表示起始像元的行列号从(0,0)开始
outRaster.SetGeoTransform(tuple(GeoTransform))
# 获取数据集第一个波段,是从1开始,不是从0开始
outband = outRaster.GetRasterBand(1)
outband.WriteArray(array)
outRasterSRS = osr.SpatialReference()
# 代码4326表示WGS84坐标
outRasterSRS.ImportFromEPSG(4326)
outRaster.SetProjection(outRasterSRS.ExportToWkt())
outband.FlushCache()
# hdf批量转tif
def hdf2tif_batch(hdfFolder):
# 获取文件夹内的文件名
hdfNameList = os.listdir(hdfFolder)
for i in range(len(hdfNameList)):
# 判断当前文件是否为HDF文件
if(os.path.splitext(hdfNameList[i])[1] == ".hdf"):
hdfPath = hdfFolder+"/"+hdfNameList[i]
# gdal打开hdf数据集
datasets = gdal.Open(hdfPath)
# 获取hdf中的元数据
Metadata = datasets.GetMetadata()
# 获取四个角的维度
Latitudes = Metadata["GRINGPOINTLATITUDE.1"]
# 采用", "进行分割
LatitudesList = Latitudes.split(", ")
# 获取四个角的经度
Longitude = Metadata["GRINGPOINTLONGITUDE.1"]
# 采用", "进行分割
LongitudeList = Longitude.split(", ")
# 图像四个角的地理坐标
GeoCoordinates = np.zeros((4, 2), dtype = "float32")
GeoCoordinates[0] = np.array([float(LongitudeList[0]),float(LatitudesList[0])])
GeoCoordinates[1] = np.array([float(LongitudeList[1]),float(LatitudesList[1])])
GeoCoordinates[2] = np.array([float(LongitudeList[2]),float(LatitudesList[2])])
GeoCoordinates[3] = np.array([float(LongitudeList[3]),float(LatitudesList[3])])
# 列数
# Columns = float(Metadata["DATACOLUMNS"])
Columns = 270
Rows = 406
# 行数
#Rows = float(Metadata["DATAROWS"])
# 图像四个角的图像坐标
PixelCoordinates = np.array([[0, 0],
[Columns - 1, 0],
[Columns - 1, Rows - 1],
[0, Rows - 1]], dtype = "float32")
# 计算仿射变换矩阵
from scipy.optimize import leastsq
def func(i):
Transform0, Transform1, Transform2, Transform3, Transform4, Transform5 = i[0], i[1], i[2], i[3], i[4], i[5]
return [Transform0 + PixelCoordinates[0][0] * Transform1 + PixelCoordinates[0][1] * Transform2 - GeoCoordinates[0][0],
Transform3 + PixelCoordinates[0][0] * Transform4 + PixelCoordinates[0][1] * Transform5 - GeoCoordinates[0][1],
Transform0 + PixelCoordinates[1][0] * Transform1 + PixelCoordinates[1][1] * Transform2 - GeoCoordinates[1][0],
Transform3 + PixelCoordinates[1][0] * Transform4 + PixelCoordinates[1][1] * Transform5 - GeoCoordinates[1][1],
Transform0 + PixelCoordinates[2][0] * Transform1 + PixelCoordinates[2][1] * Transform2 - GeoCoordinates[2][0],
Transform3 + PixelCoordinates[2][0] * Transform4 + PixelCoordinates[2][1] * Transform5 - GeoCoordinates[2][1],
Transform0 + PixelCoordinates[3][0] * Transform1 + PixelCoordinates[3][1] * Transform2 - GeoCoordinates[3][0],
Transform3 + PixelCoordinates[3][0] * Transform4 + PixelCoordinates[3][1] * Transform5 - GeoCoordinates[3][1]]
# 最小二乘法求解
GeoTransform = leastsq(func,np.asarray((1,1,1,1,1,1)))
# 获取数据时间
date = Metadata["RANGEBEGINNINGDATE"]
# 第一个子数据集合,也就是NDVI数据
DatasetNDVI = datasets.GetSubDatasets()[20][0]
RasterNDVI = gdal.Open(DatasetNDVI)
NDVI = RasterNDVI.ReadAsArray()
TifName = date +str(i)+ ".tif"
array2raster(TifName, GeoTransform[0], NDVI)
print(TifName,"Saved successfully!")
hdf2tif_batch(r"F:\MOD06_L2\2015_1")
缺点:投影信息不准确
优点:速度快
改进:需要把投影信息改一下,但是投影还挺让人头疼的
MCTK+IDL批处理方法
envi版本:5.3(5.6的用不了)
MCTK使用手册:MCTK使用手册(英文版和自己翻译)-CSDN博客 (感谢翻译)
参考博文:MCTK批处理MODIS L2 swath产品_modis swath type l2-CSDN博客
代码一:
pro MCTK_batch
compile_opt idl2
e = envi()
envi, /restore_base_save_files
envi_batch_init, log_file='batch.txt'
;选择需要批处理文件所在的文件夹
inpath = Dialog_pickfile(/directory, title='Select MOD files inputpath')
cd, inpath
filenames = file_search('*.hdf')
n = N_elements(filenames)
print, n
;选择处理后文件另存的文件夹位置
outpath = Dialog_pickfile(/directory, title='Select MOD_Geo files outpath')
PRINT, '开始处理数据 : ', SYSTIME()
;导出
bridges = mctk_create_bridges()
FOR i = 0, n-1 DO BEGIN
MODfilename = inpath+filenames[i]
filename = filenames[i]
print,filename
out_name = STRMID(filename, 0, strlen(filename) - 4)+'_1000mLST'
swath_name = 'mod06'
sd_names = ['Surface_Pressue']
Params = [6378137.0,6356752.3,0.00000000,105.00000,0.0000000,0.00000000,25.0000000,47.000]
name = 'Albers'
datum = 'WGS84'
units = envi_translate_projection_units('meters')
output_projection = envi_proj_create(name=name,datum=datum,params=params,type=9)
;以上是根据我个人需要设置的输出投影:Albers
;Output method schema is:
;0 = Standard, 1 = Reprojected, 2 = Standard and reprojected
out_method = 1
;INTERP_METHOD 0 — Nearest neighbor 1 — Bilinear 2 — Cubic convolution
;The values of 6, 7, and 8 that were used in the original API are no longer valid.
interpolation_method = 1
;background = NAN
nan_fill = float('NaN')
;这里调用了MTCK
convert_modis_data,in_file=MODfilename,out_path=outpath,$
out_root=out_name,swt_name=swath_name,sd_names=sd_names,$
out_method=out_method,out_proj=output_projection,$
background=nan_fill,/no_msg,bridges=bridges,interp_method=interpolation_method
ENDFOR
mctk_destroy_bridges, bridges
PRINT, '处理完成 : ', SYSTIME()
envi_batch_exit
end
更新代码二:(这个更好用一些)(2024/5/10)
;Level 2 swath example
pro test_batch_modis_conversion_l2_swath
compile_opt idl2
;读取文件夹内所有文件信息存为string
filearr = file_search('F:\MOD06_L2\HDF\','*.hdf',count=num);
lon=size(filearr)
len=lon[1]
for i=0,len do begin
;获取文件地址
modis_swath_file=filearr[i];
;The specified output location MUST end in the appropriate path
;separator for your OS
output_location = 'F:\MOD06_L2\output\'
;输出命名
output_rootname = 'mod06'+string(i)
;需要修改特征信息,根据提取的参数
swath_name = 'mod06'
sd_names = ['Surface_Pressure','Surface_Temperature']
;Output method schema is:
;0 = Standard, 1 = Projected, 2 = Standard and Projected
out_method = 1
;这里是定义投影
Params = [6378137.0,6356752.3,0.00000000,105.00000,0.0000000,0.00000000,25.0000000,47.000]
name = 'Albers'
datum = 'WGS84'
units = envi_translate_projection_units('meters')
output_projection = envi_proj_create(name=name,datum=datum,params=params,type=9)
;Choosing nearest neighbor interpolation
interpolation_method = 0
;do not put the bridge creation/destruction code inside a loop
bridges = mctk_create_bridges()
convert_modis_data, in_file=modis_swath_file, $
out_path=output_location, out_root=output_rootname, $
swt_name=swath_name, sd_names=sd_names, $
out_method=out_method, out_proj=output_projection, $
interp_method=interpolation_method, /no_msg, $
r_fid_array=r_fid_array, r_fname_array=r_fname_array, $
bridges=bridges, msg=msg
mctk_destroy_bridges, bridges
endfor
end
结果:
代码需要改的地方: swath_name 、sd_names 、filearr、output_projection
优点:投影简单,基于envi
缺点:慢
更新(2024/5/16):对上面用IDL中处理的第二个代码进行了调整,使得代码处理数据效率更高:
1、将bridge的创建和移除放到了批量循环的外面,这样不用每次循环就创建一次
2、由于是批量处理,并且envi+IDL处理时不会主动关闭已经打开的影像,于是就会出现处理内存不足的情况,这里稍微写了一下说明,可以根据自己的需求修改来释放内存。
修改后的代码为:
;Level 2 swath example
pro test_batch_modis_conversion_l2_swath
compile_opt idl2
filearr = file_search('F:\MOD06_L2\2016\','*.hdf',count=num);
lon=size(filearr)
len=lon[1]
bridges = mctk_create_bridges()
for i=0,len do begin
modis_swath_file=filearr[i];
name1=file_basename(modis_swath_file)
name=strmid(name1,10,12)
;modis_swath_file = 'C:\MCTK_Input\MOD07_L2.A2013173.0710.006.2013173195131.hdf'
;The specified output location MUST end in the appropriate path
;separator for your OS
output_location = 'E:\TS\2016\'
output_rootname =name+'TS'
swath_name = 'mod06'
sd_names = ['Surface_Temperature'];Solar_Zenith
;Output method schema is:
;0 = Standard, 1 = Projected, 2 = Standard and Projected
out_method = 1
Params = [6378137.0,6356752.3,0.00000000,105.00000,0.0000000,0.00000000,25.0000000,47.000]
name = 'Albers'
datum = 'WGS84'
units = envi_translate_projection_units('Meters')
output_projection = envi_proj_create(name=name,datum=datum,params=params,type=9)
;output_projection = envi_proj_create(/geographic)
;Choosing nearest neighbor interpolation
interpolation_method = 0
;do not put the bridge creation/destruction code inside a loop
convert_modis_data, in_file=modis_swath_file, $
out_path=output_location, out_root=output_rootname, $
swt_name=swath_name, sd_names=sd_names, $
out_method=out_method, out_proj=output_projection, $
interp_method=interpolation_method, /no_msg, $
r_fid_array=r_fid_array, r_fname_array=r_fname_array, $
bridges=bridges, msg=msg
fids = envi_get_file_ids()
envi_file_mng,id = fids[0],/remove
endfor
mctk_destroy_bridges, bridges
end