在ICESat2学习笔记9 :python读取ATL08数据基础上,完成批量处理工作:
import icepyx as ipx
import numpy as np
import pandas as pd
import h5py
import os,json
from pprint import pprint
import dask.dataframe as dd
# put the full filepath to a data file here. You can get this in JupyterHub by navigating to the file,
# right clicking, and selecting copy path. Then you can paste the path in the quotes below.
def read_atl08(fname, bbox=None):
"""
Read 1 ATL08 file and output 6 reduced files.
Extract variables of interest and separate the ATL08 file
into each beam (ground track) and ascending/descending orbits.
"""
# Each beam is a group
group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']
# Loop trough beams
for k,g in enumerate(group):
#-----------------------------------#
# 1) Read in data for a single beam #
#-----------------------------------#
# Load variables into memory (more can be added!)
with h5py.File(fname, 'r') as fi:
lat = fi[g+'/land_segments/latitude'][:]
lon = fi[g+'/land_segments/longitude'][:]
canopy_h_metrics = fi[g+'/land_segments/canopy/canopy_h_metrics'][:]
canopy_openness = fi[g+'/land_segments/canopy/canopy_openness'][:]
#---------------------------------------------#
# 2) Filter data according region and quality #
#---------------------------------------------#
# Select a region of interest
if bbox:
lonmin, lonmax, latmin, latmax = bbox
bbox_mask = (lon >= lonmin) & (lon <= lonmax) & \
(lat >= latmin) & (lat <= latmax)
else:
bbox_mask = np.ones_like(lat, dtype=bool) # get all
# Only keep good data, and data inside bbox
#mask = (q_flag == 0) & (np.abs(h_li) < 10e3) & (bbox_mask == 1)
# Test for no data
if len(canopy_h_metrics) == 0: continue
# Define output file name
ofile = fname.replace('.h5', '_'+g[1:]+'.h5')
# Save variables
with h5py.File(ofile, 'w') as f:
f['lon'] = lon
f['lat'] = lat
f['canopy_h_metrics'] = canopy_h_metrics
f['canopy_openness'] = canopy_openness
print('out ->', ofile)
# save as csv
ofilecsv = fname.replace('.h5', '_'+g[1:]+'.csv')
result = pd.DataFrame()
result['lon'] = lon
result['lat'] = lat
result['canopy_h_metrics_0'] = canopy_h_metrics[:,0]
result['canopy_h_metrics_1'] = canopy_h_metrics[:,1]
result['canopy_h_metrics_2'] = canopy_h_metrics[:,2]
result['canopy_h_metrics_3'] = canopy_h_metrics[:,3]
result['canopy_h_metrics_4'] = canopy_h_metrics[:,4]
result['canopy_h_metrics_5'] = canopy_h_metrics[:,5]
result['canopy_h_metrics_6'] = canopy_h_metrics[:,6]
result['canopy_h_metrics_7'] = canopy_h_metrics[:,7]
result['canopy_h_metrics_8'] = canopy_h_metrics[:,8]
result['canopy_openness'] = canopy_openness
print('out ->', ofilecsv)
result.to_csv(ofilecsv,index=None)
# 输入参数为h5文件所在目录
def readMultiH5(dir):
dfs = []
# 遍历文件目录,将所有文件类型为h5的文件处理
# for root_dir, sub_dir, files in os.walk(r'' + dir):
# 第一个为起始路径,第二个为起始路径下的文件夹,第三个是起始路径下的文件。
for root_dir,sub_dir,files in os.walk(dir):
for file in files:
if file.endswith('h5'):
#构造绝对路径
file_name = os.path.join(root_dir,file)
read_atl08(file_name, None)
#调用并执行
root_dir = r'G:\6.ICESat2 data\5000000951831-08'
readMultiH5(root_dir)