MOD35数据下载下来由于是HDF格式,因此一般的RS软件不太好处理,所以这里选择了HEG提供的python接口,现将HDF文件批量转换为TIF文件,再对TIF格式的产品进行二进制转换等操作。
1、首先是HEG批量转换的代码,这里要注意,先用heg软件生成参考文件,并将代码中HEG的环境变量设置好才能进行转换。
import glob
import re
from pyhdf.SD import SD
import numpy as np
import os
# 设置HEG相关环境变量,简单来说就是把data,TOOLKIT_MTD,bin的路径告诉系统,方便cmd执行
os.environ['MRTDATADIR'] = r'D:\HEGTools\HEG\HEG_Win\data'
os.environ['PGSHOME'] = r'D:\HEGTools\HEG\HEG_Win\TOOLKIT_MTD'
os.environ['MRTBINDIR'] = r'D:\HEGTools\HEG\HEG_Win\bin'
# 设置HEG的bin路径
hegpath = r'D:\HEGTools\HEG\HEG_Win\bin'
# 指定处理模块的可执行程序文件subset_stitch_grid,
hegdo = os.path.join(hegpath, 'subset_stitch_swath.exe')
def mkdir(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
# print
# path + ' 创建成功'
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path + ' 目录已存在')
return False
# 指定输入数据的路径
inpath = r"G:\modistest\MOD35\2009"
# 创建文件函数
mkdir(inpath + '\\prm_C\\') # 创建输出参数文件的路径
# 输出结果文件的路径
outpath = r"G:\modistest\MOD35\preprocessing\step1\2009\\"
# 获取当前文件夹下的所有hdf文件
allhdffiles = glob.glob(inpath + '\*.hdf')
allhdffiles.sort() # 文件排序
# 参考文件路径
##################读取参考文件参数#########
fr = open("D:\modistest\prm\prm_swathstitch", "r+")
prm = fr.read() # 读取prm文件
NUMBER_INPUTFILES = re.findall("NUMBER_INPUTFILES = (.*)", prm)[0] # 拼接文件数目
FIELD_NAME = re.findall("FIELD_NAME = (.*)", prm)[0]
OBJECT_NAME = re.findall("OBJECT_NAME = (.*)", prm)[0]
BAND_NUMBER = re.findall("BAND_NUMBER = (.*)", prm)[0]
SPATIAL_SUBSET_UL_CORNER = re.findall("SPATIAL_SUBSET_UL_CORNER = (.*)", prm)[0]
SPATIAL_SUBSET_LR_CORNER = re.findall("SPATIAL_SUBSET_LR_CORNER = (.*)", prm)[0]
OUTPUT_OBJECT_NAME = re.findall("OUTPUT_OBJECT_NAME = (.*)", prm)[0]
OUTGRID_X_PIXELSIZE = re.findall("OUTGRID_X_PIXELSIZE = (.*)", prm)[0]
OUTGRID_Y_PIXELSIZE = re.findall("OUTGRID_Y_PIXELSIZE = (.*)", prm)[0]
RESAMPLING_TYPE = re.findall("RESAMPLING_TYPE = (.*)", prm)[0]
OUTPUT_PROJECTION_TYPE = re.findall("OUTPUT_PROJECTION_TYPE = (.*)", prm)[0]
ELLIPSOID_CODE = re.findall("ELLIPSOID_CODE = (.*)", prm)[0]
OUTPUT_PROJECTION_PARAMETERS = re.findall("OUTPUT_PROJECTION_PARAMETERS = (.*)", prm)[0]
OUTPUT_FILENAME = re.findall("OUTPUT_FILENAME = (.*)", prm)[0]
SAVE_STITCHED_FILE = re.findall("SAVE_STITCHED_FILE = (.*)", prm)[0]
OUTPUT_STITCHED_FILENAME = re.findall("ELLIPSOID_CODE = (.*)", prm)[0]
OUTPUT_TYPE = re.findall("OUTPUT_TYPE = (.*)", prm)[0]
fr.close()
# ########################################
#########################################################
"""
以下用于获取每天的数据条目
"""
#########################################################
hdf_group = [] # hdf组列表
countlist = [] # 计数列表
hdfNameList = os.listdir(inpath)
# print(hdfNameList)
filename1 = ""
count = 1 # 计数文件
n = 0 # 起点计数
for i in range(len(hdfNameList)):
# 获取文件名后缀
dirname, basename = os.path.split(hdfNameList[i]) # 把路径分割成 dirname(文件路径) 和 basename(文件名),返回一个元组
filename, txt = os.path.splitext(basename) # 分割路径中的文件名与拓展名
# 判断文件后缀是否为 .hdf
if txt == '.hdf':
if filename[0:17] == filename1:
count += 1
elif filename1 != "" and filename[0:17] != filename1:
hdf_group.append(allhdffiles[n:n + count])
countlist.append(count)
n = n + count
count = 1
filename1 = filename[0:17]
elif i == len(hdfNameList) - 1:
m = len(allhdffiles)
hdf_group.append(allhdffiles[n:m])
countlist.append(m - n)
####################################################
inputfile = '|'.join(hdf_group[0])
i = 0
while i < len(hdf_group):
top_max = 0
right_max = 0
bottome_min = 1000
left_min = 1000
# 变量prm暂存过会儿需要写进txt的文本
inputfile = '|'.join(hdf_group[i]) # 将文件以|分开
# print(hdf_group[i][0])
# print(len(hdf_group[i]))
for j in range(len(hdf_group[i])):
print(hdf_group[i][j])
hdf = SD(hdf_group[i][j])
lon = hdf.select('Longitude')
lat = hdf.select('Latitude')
data_lon = np.array(lon.get())
data_lat = np.array(lat.get())
if data_lat.max() >= top_max:
top_max = data_lat.max()
if data_lon.max() >= right_max:
right_max = data_lon.max()
if data_lat.min() <= bottome_min:
bottome_min = data_lat.min()
if data_lon.min() <= left_min:
left_min = data_lon.min()
# FIELD_NAME = "Cloud_Mask|"
# 输出文件名
print(outpath)
outputfile = outpath + '.'.join(os.path.basename(hdf_group[i][0]).split('.')[0:2]) + '.' + FIELD_NAME.strip(
'|') + '.tif'
# 拼接的总数
NUMBER_INPUTFILES = str(countlist[i])
# 定义影像的空间范围
SPATIAL_SUBSET_UL_CORNER = '( ' + str(top_max) + ' ' + str(left_min) + ' )' # 左上角坐标
SPATIAL_SUBSET_LR_CORNER = '( ' + str(bottome_min) + ' ' + str(right_max) + ' )' # 右上角坐标
prm = [
'NUM_RUNS = 1\n',
'BEGIN\n',
'NUMBER_INPUTFILES = ' + NUMBER_INPUTFILES + '\n', # 提前根据命名规则看一下你需要把多少个文件合在一起,这里就写几
'INPUT_FILENAMES = ' + inputfile + '\n', # 这里是路径
'OBJECT_NAME = ' + OBJECT_NAME + '\n',
'FIELD_NAME = ' + FIELD_NAME + '\n', # 要处理的数据集
'BAND_NUMBER = ' + BAND_NUMBER + '\n',
'SPATIAL_SUBSET_UL_CORNER = ' + SPATIAL_SUBSET_UL_CORNER + '\n',
'SPATIAL_SUBSET_LR_CORNER = ' + SPATIAL_SUBSET_LR_CORNER + '\n',
'OUTPUT_OBJECT_NAME = ' + OUTPUT_OBJECT_NAME + '\n',
'OUTGRID_X_PIXELSIZE = ' + OUTGRID_X_PIXELSIZE + '\n',
'OUTGRID_Y_PIXELSIZE = ' + OUTGRID_Y_PIXELSIZE + '\n',
'RESAMPLING_TYPE = ' + RESAMPLING_TYPE + '\n',
'OUTPUT_PROJECTION_TYPE = ' + OUTPUT_PROJECTION_TYPE + '\n',
'ELLIPSOID_CODE = ' + ELLIPSOID_CODE + '\n',
'UTM_ZONE = 46\n',
'OUTPUT_PROJECTION_PARAMETERS = ' + OUTPUT_PROJECTION_PARAMETERS + '\n',
'OUTPUT_FILENAME = ' + outputfile + '\n',
# 输出文件名,路径+名字+后缀名,你可以随意命名
'SAVE_STITCHED_FILE = ' + SAVE_STITCHED_FILE + '\n',
'OUTPUT_STITCHED_FILENAME = ' + outpath + '/' + allhdffiles[i] + '_stitched.hdf\n',
'OUTPUT_TYPE = ' + OUTPUT_TYPE + '\n',
'END\n',
]
prmfilename = inpath + "\\prm_C\\" + '.'.join(
os.path.basename(hdf_group[i][0]).split('.')[0:2]) + '.' + FIELD_NAME.strip('|') + '_prm'
print(prmfilename)
# 这里一定要注意,设定换行符为‘\n’,否则由于在windows系统下默认换行符为‘\r\n’,则无法运行成功
fo = open(prmfilename, 'w', newline='\n')
fo.writelines(prm)
fo.close()
i += 1
# 读入参数文件
prmlist = glob.glob(inpath + "\\prm_C\\" + '*_prm')
# 执行拼接工具
for x in range(len(prmlist)):
prmfile = prmlist[x]
print(1)
resamplefiles = '{0} -P {1}'.format(hegdo, prmfile)
print(2)
os.system(resamplefiles)
2、批量转换为TIF文件后,可以利用下面的代码提取MOD35数据中的云信息,这里我根据其他人的代码做了一些小改动,调整了数据读取与写入,并且支持了区域裁剪。
部分代码来自:MODIS云掩模数据读取-MOD35_傻灰的博客-CSDN博客
这里是提取了置信晴空的数据。
import numpy as np
from osgeo import gdal
import os
import glob
import rasterio
from rasterio.mask import mask
import geopandas as gpd
input_file_dir = "G:/modistest/MOD35/preprocessing/step1/2009/"
output_file_dir = "G:/modistest/MOD35/preprocessing/step2/2009/"
input_shp = "D:/GIS_SHPFile/"
# 读取转换方法
def process_mod35_data(cloud_file):
print("Processing ", cloud_file)
dataset = gdal.Open(cloud_file)
# 获取数据属性信息
width = dataset.RasterXSize
height = dataset.RasterYSize
num_bands = dataset.RasterCount
geotransform = dataset.GetGeoTransform()
getProjection=dataset.GetProjection()
byte1_data = dataset.GetRasterBand(1).ReadAsArray()
# 比特0表示为否进行了检测, determined or not
# 使用按位与操作符(&)将byte1_data与二进制数0b00000001进行按位与操作,提取出字节的第0位,将结果存储在bits0中。
bits0 = byte1_data & 0b00000001
determined = bits0.astype(int)
# 第1、2比特表示置信度,00为云, 01为不确定, 10为可能晴空, 1为确信晴空
#Cloudy, Uncertainty, Probably Clear, Confident Clear
# 使用按位与操作符(&)将byte1_data与二进制数0b00000001进行按位与操作,提取出字节的第1和2位,将结果存储在bits1and2中。
bits1and2 = byte1_data & 0b00000110
# 用于存储获得的bit数据
cloud_mask_list = []
cloud_mask_list.append(determined)
# 为什么要小于2
cloudy = (bits1and2 < 2).astype(int)
cloud_mask_list.append(cloudy)
uncertain = (bits1and2 == 2).astype(int)
cloud_mask_list.append(uncertain)
probably_clear = (bits1and2 == 4).astype(int)
cloud_mask_list.append(probably_clear)
confident_clear = (bits1and2 == 6).astype(int)
cloud_mask_list.append(confident_clear)
cloud_mask = np.stack(cloud_mask_list, axis=2)
# 空列表用于存储辅助数据
ancillary_list = []
# 第3比特, 白天和晚上标识, 0晚上, 1白天
bit3 = byte1_data & 0b00001000
day_night = bit3.astype(int)
ancillary_list.append(day_night)
# 第4比特, 太阳耀斑信息(Sun Glint), 0是, 1否
bit4 = byte1_data & 0b00010000
sunglint = bit4.astype(int)
ancillary_list.append(sunglint)
# 第5比特, 冰雪信息
bit5 = byte1_data & 0b00100000
snow_ice = bit5.astype(int)
ancillary_list.append(snow_ice)
# 第6、7比特, 背景类型00水体, 01近岸, 10沙漠, 11陆地
bits6and7 = byte1_data & 0b11000000
land_water = bits6and7.astype(int)
ancillary_list.append(land_water)
# 数值合并
ancillary_data = np.stack(ancillary_list, axis=2)
# 返回第一字节的所有信息
return ancillary_data, cloud_mask,geotransform,getProjection
# 研究区clip
def clip_raster_by_shp(input_raster, input_shp, output_raster):
with rasterio.open(input_raster) as src:
gdf = gpd.read_file(input_shp)
clipped, out_transform = mask(src, gdf.geometry, crop=True)
out_meta = src.meta.copy()
out_meta.update({
"driver": "GTiff",
"height": clipped.shape[1],
"width": clipped.shape[2],
"transform": out_transform
})
src.close()
os.remove(input_raster)
with rasterio.open(output_raster, "w", **out_meta) as dest:
dest.write(clipped)
# 写数据
def writeimage(data,geotransform,getProjection,output_file_dir,basename):
cols,rows,bands = data.shape
dtype = data.dtype
driver = gdal.GetDriverByName("GTiff")
# dataset = driver.Create(output_file_dir+basename, rows, cols, bands, gdal.GDT_Int16)
# 单波段后面是1
dataset = driver.Create(output_file_dir+basename, rows, cols, 1, gdal.GDT_Int16)
dataset.SetGeoTransform(geotransform)
dataset.SetProjection(getProjection)
# 原来是所有波段均输出
# for band in range(bands):
# band_data = data[:, :,band ]
# dataset.GetRasterBand(band + 1).WriteArray(band_data)
# del dataset
#现在是只输出第二波段,即云晴判断波段
print()
band = data[:, :,4 ]
dataset.GetRasterBand(1).WriteArray(band)
del dataset
output_raster = os.path.join(output_file_dir, basename)
clip_raster_by_shp(output_file_dir+basename, input_shp, output_raster)
file_list = os.listdir(input_file_dir)
if os.path.exists(input_file_dir) == False:
Exception("input file dir does not exist.")
if os.path.exists(output_file_dir) == False:
os.mkdir(output_file_dir)
print("make dir: ", output_file_dir, ".")
for file in file_list:
if file.endswith('tif'):
path = os.path.join(input_file_dir, file)
basename = '.'.join(file.split(".")[0:5])
ancillary_data,image,geotransform,getProjection=process_mod35_data(path)
writeimage(image,geotransform,getProjection,output_file_dir,basename)